From e58786e1921174135f908b1dc2d563f64498f1da Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 7 Dec 2015 10:23:32 +1100 Subject: [PATCH 001/218] Use add_edge_if_not_present in somMayGoBackwards() As somMayGoBackwards() operates on a copy of the graph where virtual starts have been collapsed on to startDs, we need to be careful not to create parallel edges. --- src/nfagraph/ng_som_util.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index a0829451d..676fb523e 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -186,8 +186,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, return cache.smgb[u]; } - DEBUG_PRINTF("checking if som can go backwards on %u\n", - g[u].index); + DEBUG_PRINTF("checking if som can go backwards on %u\n", g[u].index); set be; BackEdges> backEdgeVisitor(be); @@ -224,6 +223,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, NGHolder c_g; cloneHolder(c_g, g, &orig_to_copy); + /* treat virtual starts as unconditional - wire to startDs instead */ for (NFAVertex v : vertices_range(g)) { if (!is_virtual_start(v, g)) { continue; @@ -236,6 +236,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, clear_vertex(c_v, c_g); } + /* treat u as the only accept state */ NFAVertex c_u = orig_to_copy[u]; clear_in_edges(c_g.acceptEod, c_g); add_edge(c_g.accept, c_g.acceptEod, c_g); @@ -256,7 +257,9 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, } for (auto v : adjacent_vertices_range(t, g)) { if (contains(u_succ, v)) { - add_edge(orig_to_copy[t], c_g.accept, c_g); + /* due to virtual starts being aliased with normal starts in the + * copy of the graph, we may have already added the edges. */ + add_edge_if_not_present(orig_to_copy[t], c_g.accept, c_g); break; } } From 5e0d10d8056b753f982f2e37f51bc67bb1aaf6e1 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Dec 2015 14:23:02 +1100 Subject: [PATCH 002/218] Allow lag on castle infixes to be reduced Reducing lag allows for castles to be merged more effectively --- CMakeLists.txt | 2 + src/rose/rose_build_castle.cpp | 399 ++++++++++++++++++++++++++++++++ src/rose/rose_build_castle.h | 69 ++++++ src/rose/rose_build_compile.cpp | 288 +---------------------- src/rose/rose_build_convert.cpp | 6 +- src/rose/rose_build_impl.h | 3 + src/rose/rose_build_misc.cpp | 17 ++ 7 files changed, 495 insertions(+), 289 deletions(-) create mode 100644 src/rose/rose_build_castle.cpp create mode 100644 src/rose/rose_build_castle.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b4d81754e..6fbc006fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -778,6 +778,8 @@ SET (hs_SRCS src/rose/rose_build_anchored.cpp src/rose/rose_build_anchored.h src/rose/rose_build_bytecode.cpp + src/rose/rose_build_castle.h + src/rose/rose_build_castle.cpp src/rose/rose_build_compile.cpp src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp new file mode 100644 index 000000000..83c69e708 --- /dev/null +++ b/src/rose/rose_build_castle.cpp @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_castle.h" + +#include "rose_build_impl.h" +#include "ue2common.h" +#include "nfa/castlecompile.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_puff.h" +#include "util/charreach.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" +#include "util/ue2_containers.h" +#include "util/ue2string.h" + +#include +#include +#include +#include + +#include + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +static +void makeCastle(LeftEngInfo &left, + unordered_map> &cache) { + if (left.dfa || left.haig || left.castle) { + return; + } + if (!left.graph) { + return; + } + + const NGHolder &h = *left.graph; + DEBUG_PRINTF("prefix %p\n", &h); + + if (contains(cache, &h)) { + DEBUG_PRINTF("using cached CastleProto\n"); + left.castle = cache[&h]; + left.graph.reset(); + return; + } + + PureRepeat pr; + if (isPureRepeat(h, pr) && pr.reports.size() == 1) { + DEBUG_PRINTF("vertex preceded by infix repeat %s\n", + pr.bounds.str().c_str()); + left.castle = make_shared(pr); + cache[&h] = left.castle; + left.graph.reset(); + } +} + +static +void makeCastleSuffix(RoseBuildImpl &tbi, RoseVertex v, + ue2::unordered_map > &cache) { + RoseSuffixInfo &suffix = tbi.g[v].suffix; + if (!suffix.graph) { + return; + } + const NGHolder &h = *suffix.graph; + DEBUG_PRINTF("suffix %p\n", &h); + + if (contains(cache, &h)) { + DEBUG_PRINTF("using cached CastleProto\n"); + suffix.castle = cache[&h]; + suffix.graph.reset(); + return; + } + + // The MPV will probably do a better job on the cases it's designed + // for. + const bool fixed_depth = tbi.g[v].min_offset == tbi.g[v].max_offset; + if (isPuffable(h, fixed_depth, tbi.rm, tbi.cc.grey)) { + DEBUG_PRINTF("leaving suffix for puff\n"); + return; + } + + PureRepeat pr; + if (isPureRepeat(h, pr) && pr.reports.size() == 1) { + DEBUG_PRINTF("suffix repeat %s\n", pr.bounds.str().c_str()); + + // Right now, the Castle uses much more stream state to represent a + // {m,1} repeat than just leaving it to an NFA. + if (pr.bounds.max <= depth(1)) { + DEBUG_PRINTF("leaving for other engines\n"); + return; + } + + suffix.castle = make_shared(pr); + cache[&h] = suffix.castle; + suffix.graph.reset(); + } +} + +static +vector literals_for_vertex(const RoseBuildImpl &tbi, + RoseVertex v) { + vector rv; + + for (const u32 id : tbi.g[v].literals) { + rv.push_back(tbi.literals.right.at(id)); + } + + return rv; +} + +static +void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle, + const vector &verts) { + DEBUG_PRINTF("looking to renovate\n"); + + if (castle->repeats.size() != 1) { + assert(0); /* should not have merged castles yet */ + return; + } + + PureRepeat &pr = castle->repeats.begin()->second; + if (pr.bounds.max.is_finite()) { + /* repeat cannot be turned into pseudo .* */ + return; + } + + RoseGraph &g = tbi.g; + const CharReach &cr = castle->reach(); + + DEBUG_PRINTF("cr || %zu\n", cr.count()); + + u32 allowed_to_remove = ~0; + size_t min_succ_lit_len = 0; + + for (RoseVertex v : verts) { + assert(g[v].left.castle.get() == castle); + DEBUG_PRINTF("%zu checks at lag %u\n", g[v].idx, g[v].left.lag); + vector lits = literals_for_vertex(tbi, v); + for (const auto &e : lits) { + DEBUG_PRINTF("%s +%u\n", dumpString(e.s).c_str(), e.delay); + if (e.delay) { + return; /* bail - TODO: be less lazy */ + } + + vector rem_local_cr; + u32 ok_count = 0; + for (auto it = e.s.end() - g[v].left.lag; it != e.s.end(); ++it) { + if (!isSubsetOf(*it, cr)) { + break; + } + + ok_count++; + } + LIMIT_TO_AT_MOST(&allowed_to_remove, ok_count); + ENSURE_AT_LEAST(&min_succ_lit_len, e.elength()); + } + } + + DEBUG_PRINTF("possible to decrease lag by %u\n", allowed_to_remove); + + + for (RoseVertex v : verts) { + assert(g[v].left.lag >= allowed_to_remove); + g[v].left.lag -= allowed_to_remove; + } + + assert(castle->repeats.size() == 1); /* should not have merged castles yet */ + + pr.bounds.max += allowed_to_remove; + + /* Although it is always safe to increase the min bound as well, we would + * rather not as a >0 min bound means that we have to store state as well. + * + * As it was legal to run with the original lag, we know that it is not + * possible to have an overlapping match which finishes within the trigger + * literal past the original lag point. However, if there is already a min + * bound constraint this would be broken if we did not also increase the + * min bound. */ + + if (pr.bounds.min > 0ULL || allowed_to_remove > min_succ_lit_len) { + pr.bounds.min += allowed_to_remove; + } +} + +void makeCastles(RoseBuildImpl &tbi) { + if (!tbi.cc.grey.allowCastle && !tbi.cc.grey.allowLbr) { + return; + } + + RoseGraph &g = tbi.g; + + // Caches so that we can reuse analysis on graphs we've seen already. + unordered_map > left_cache; + unordered_map > suffix_cache; + + unordered_map> rev; + + for (RoseVertex v : vertices_range(g)) { + if (g[v].left && !tbi.isRootSuccessor(v)) { + makeCastle(g[v].left, left_cache); + if (g[v].left.castle) { + rev[g[v].left.castle.get()].push_back(v); + } + } + + if (g[v].suffix) { + makeCastleSuffix(tbi, v, suffix_cache); + } + } + + for (const auto &e : rev) { + renovateCastle(tbi, e.first, e.second); + } +} + +bool unmakeCastles(RoseBuildImpl &tbi) { + RoseGraph &g = tbi.g; + + const size_t MAX_UNMAKE_VERTICES = 64; + + map > left_castles; + map > suffix_castles; + bool changed = false; + + for (auto v : vertices_range(g)) { + const LeftEngInfo &left = g[v].left; + if (left.castle && left.castle->repeats.size() > 1) { + left_castles[left].push_back(v); + } + const RoseSuffixInfo &suffix = g[v].suffix; + if (suffix.castle && suffix.castle->repeats.size() > 1) { + suffix_castles[suffix].push_back(v); + } + } + + for (const auto &e : left_castles) { + assert(e.first.castle()); + shared_ptr h = makeHolder(*e.first.castle(), NFA_INFIX, + tbi.cc); + if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { + continue; + } + DEBUG_PRINTF("replace rose with holder (%zu vertices)\n", + num_vertices(*h)); + for (auto v : e.second) { + assert(g[v].left.castle.get() == e.first.castle()); + g[v].left.graph = h; + g[v].left.castle.reset(); + changed = true; + } + } + + for (const auto &e : suffix_castles) { + assert(e.first.castle()); + shared_ptr h = makeHolder(*e.first.castle(), NFA_SUFFIX, + tbi.cc); + if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { + continue; + } + DEBUG_PRINTF("replace suffix with holder (%zu vertices)\n", + num_vertices(*h)); + for (auto v : e.second) { + assert(g[v].suffix.castle.get() == e.first.castle()); + g[v].suffix.graph = h; + g[v].suffix.castle.reset(); + changed = true; + } + } + + return changed; +} + +void remapCastleTops(RoseBuildImpl &tbi) { + ue2::unordered_map > rose_castles; + ue2::unordered_map > suffix_castles; + + RoseGraph &g = tbi.g; + for (auto v : vertices_range(g)) { + if (g[v].left.castle) { + rose_castles[g[v].left.castle.get()].push_back(v); + } + if (g[v].suffix.castle) { + suffix_castles[g[v].suffix.castle.get()].push_back(v); + } + } + + DEBUG_PRINTF("%zu rose castles, %zu suffix castles\n", rose_castles.size(), + suffix_castles.size()); + + map top_map; + + // Remap Rose Castles. + for (const auto &rc : rose_castles) { + CastleProto *c = rc.first; + const vector &verts = rc.second; + + DEBUG_PRINTF("rose castle %p (%zu repeats) has %zu verts\n", c, + c->repeats.size(), verts.size()); + + top_map.clear(); + remapCastleTops(*c, top_map); + + // Update the tops on the edges leading into vertices in v. + for (auto v : verts) { + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + } + + // Remap Suffix Castles. + for (const auto &e : suffix_castles) { + CastleProto *c = e.first; + const vector &verts = e.second; + + DEBUG_PRINTF("suffix castle %p (%zu repeats) has %zu verts\n", c, + c->repeats.size(), verts.size()); + + top_map.clear(); + remapCastleTops(*c, top_map); + + // Update the tops on the suffixes. + for (auto v : verts) { + assert(g[v].suffix); + g[v].suffix.top = top_map.at(g[v].suffix.top); + } + } +} + +bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, + const set &all_lits, + const RoseEdge &e) { + assert(left.castle()); + const CastleProto &c = *left.castle(); + + const depth max_width = findMaxWidth(c); + DEBUG_PRINTF("castle max width is %s\n", max_width.str().c_str()); + + /* check each pred literal to see if they all kill previous castle + * state */ + for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { + const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); + const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); + const CharReach &cr = c.reach(); + + DEBUG_PRINTF("s=%s, castle reach=%s\n", dumpString(s).c_str(), + describeClass(cr).c_str()); + + for (const auto &s_cr : s) { + if (!overlaps(cr, s_cr)) { + DEBUG_PRINTF("reach %s kills castle\n", + describeClass(s_cr).c_str()); + goto next_pred; + } + } + + if (max_width < depth(s.length())) { + DEBUG_PRINTF("literal width >= castle max width\n"); + goto next_pred; + } + + return false; + + next_pred:; + } + + return true; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_castle.h b/src/rose/rose_build_castle.h new file mode 100644 index 000000000..4a2b6188b --- /dev/null +++ b/src/rose/rose_build_castle.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_CASTLE_H +#define ROSE_BUILD_CASTLE_H + +#include "rose_graph.h" + +#include + +namespace ue2 { + +class RoseBuildImpl; +struct left_id; +struct ue2_literal; + +/** + * Runs over all rose infix/suffix engines and converts those that are pure + * repeats with one report into CastleProto engines. + */ +void makeCastles(RoseBuildImpl &tbi); + +/** + * Identifies all the CastleProto prototypes that are small enough that they + * would be better implemented as NFAs, and converts them back to NGHolder + * prototypes. + * + * Returns true if any changes were made. + */ +bool unmakeCastles(RoseBuildImpl &tbi); + +/** + * Runs over all the Castle engine prototypes in the graph and ensures that + * they have tops in a contiguous range, ready for construction. + */ +void remapCastleTops(RoseBuildImpl &tbi); + +bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, + const std::set &all_lits, + const RoseEdge &e); + +} + +#endif diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index a2bd971e2..444ccdd94 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -31,13 +31,13 @@ #include "grey.h" #include "hs_internal.h" #include "rose_build_anchored.h" +#include "rose_build_castle.h" #include "rose_build_convert.h" #include "rose_build_dump.h" #include "rose_build_merge.h" #include "rose_build_role_aliasing.h" #include "rose_build_util.h" #include "ue2common.h" -#include "nfa/castlecompile.h" #include "nfa/nfa_internal.h" #include "nfa/rdfa.h" #include "nfagraph/ng_holder.h" @@ -46,7 +46,6 @@ #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_mcclellan.h" -#include "nfagraph/ng_puff.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" #include "nfagraph/ng_stop.h" @@ -1606,24 +1605,6 @@ map> findLeftSucc(RoseBuildImpl &tbi) { return leftfixes; } -static -ue2_literal findNonOverlappingTail(const set &lits, - const ue2_literal &s) { - size_t max_overlap = 0; - - for (const auto &lit : lits) { - size_t overlap = lit != s ? maxStringOverlap(lit, s) - : maxStringSelfOverlap(s); - max_overlap = max(max_overlap, overlap); - } - - /* find the tail that doesn't overlap */ - ue2_literal tail = s.substr(max_overlap); - DEBUG_PRINTF("%zu overlap, tail: '%s'\n", max_overlap, - dumpString(tail).c_str()); - return tail; -} - static bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, const set &all_lits, @@ -1657,47 +1638,6 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, return true; } -static -bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, - const set &all_lits, - const RoseEdge &e) { - assert(left.castle()); - const CastleProto &c = *left.castle(); - - const depth max_width = findMaxWidth(c); - DEBUG_PRINTF("castle max width is %s\n", max_width.str().c_str()); - - /* check each pred literal to see if they all kill previous castle - * state */ - for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { - const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); - const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); - const CharReach &cr = c.reach(); - - DEBUG_PRINTF("s=%s, castle reach=%s\n", dumpString(s).c_str(), - describeClass(cr).c_str()); - - for (const auto &s_cr : s) { - if (!overlaps(cr, s_cr)) { - DEBUG_PRINTF("reach %s kills castle\n", - describeClass(s_cr).c_str()); - goto next_pred; - } - } - - if (max_width < depth(s.length())) { - DEBUG_PRINTF("literal width >= castle max width\n"); - goto next_pred; - } - - return false; - - next_pred:; - } - - return true; -} - static bool triggerKillsRose(const RoseBuildImpl &tbi, const left_id &left, const set &all_lits, const RoseEdge &e) { @@ -2231,232 +2171,6 @@ void addAnchoredSmallBlockLiterals(RoseBuildImpl &tbi) { } } -static -void makeCastle(LeftEngInfo &left, - ue2::unordered_map > &cache) { - if (left.dfa || left.haig || left.castle) { - return; - } - if (!left.graph) { - return; - } - - const NGHolder &h = *left.graph; - DEBUG_PRINTF("prefix %p\n", &h); - - if (contains(cache, &h)) { - DEBUG_PRINTF("using cached CastleProto\n"); - left.castle = cache[&h]; - left.graph.reset(); - return; - } - - PureRepeat pr; - if (isPureRepeat(h, pr) && pr.reports.size() == 1) { - DEBUG_PRINTF("vertex preceded by infix repeat %s\n", - pr.bounds.str().c_str()); - left.castle = make_shared(pr); - cache[&h] = left.castle; - left.graph.reset(); - } -} - -static -void makeCastleSuffix(RoseBuildImpl &tbi, RoseVertex v, - ue2::unordered_map > &cache) { - RoseSuffixInfo &suffix = tbi.g[v].suffix; - if (!suffix.graph) { - return; - } - const NGHolder &h = *suffix.graph; - DEBUG_PRINTF("suffix %p\n", &h); - - if (contains(cache, &h)) { - DEBUG_PRINTF("using cached CastleProto\n"); - suffix.castle = cache[&h]; - suffix.graph.reset(); - return; - } - - // The MPV will probably do a better job on the cases it's designed - // for. - const bool fixed_depth = tbi.g[v].min_offset == tbi.g[v].max_offset; - if (isPuffable(h, fixed_depth, tbi.rm, tbi.cc.grey)) { - DEBUG_PRINTF("leaving suffix for puff\n"); - return; - } - - PureRepeat pr; - if (isPureRepeat(h, pr) && pr.reports.size() == 1) { - DEBUG_PRINTF("suffix repeat %s\n", pr.bounds.str().c_str()); - - // Right now, the Castle uses much more stream state to represent a - // {m,1} repeat than just leaving it to an NFA. - if (pr.bounds.max <= depth(1)) { - DEBUG_PRINTF("leaving for other engines\n"); - return; - } - - suffix.castle = make_shared(pr); - cache[&h] = suffix.castle; - suffix.graph.reset(); - } -} - -/** - * Runs over all rose infix/suffix engines and converts those that are pure - * repeats with one report into CastleProto engines. - */ -static -void makeCastles(RoseBuildImpl &tbi) { - if (!tbi.cc.grey.allowCastle && !tbi.cc.grey.allowLbr) { - return; - } - - RoseGraph &g = tbi.g; - - // Caches so that we can reuse analysis on graphs we've seen already. - ue2::unordered_map > left_cache; - ue2::unordered_map > suffix_cache; - - for (auto v : vertices_range(g)) { - if (g[v].left && !tbi.isRootSuccessor(v)) { - makeCastle(g[v].left, left_cache); - } - - if (g[v].suffix) { - makeCastleSuffix(tbi, v, suffix_cache); - } - } -} - -/** - * Identifies all the CastleProto prototypes that are small enough that they - * would be better implemented as NFAs, and converts them back to NGHolder - * prototypes. - * - * Returns true if any changes were made. - */ -static -bool unmakeCastles(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - - const size_t MAX_UNMAKE_VERTICES = 64; - - map > left_castles; - map > suffix_castles; - bool changed = false; - - for (auto v : vertices_range(g)) { - const LeftEngInfo &left = g[v].left; - if (left.castle && left.castle->repeats.size() > 1) { - left_castles[left].push_back(v); - } - const RoseSuffixInfo &suffix = g[v].suffix; - if (suffix.castle && suffix.castle->repeats.size() > 1) { - suffix_castles[suffix].push_back(v); - } - } - - for (const auto &e : left_castles) { - assert(e.first.castle()); - shared_ptr h = makeHolder(*e.first.castle(), NFA_INFIX, - tbi.cc); - if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { - continue; - } - DEBUG_PRINTF("replace rose with holder (%zu vertices)\n", - num_vertices(*h)); - for (auto v : e.second) { - assert(g[v].left.castle.get() == e.first.castle()); - g[v].left.graph = h; - g[v].left.castle.reset(); - changed = true; - } - } - - for (const auto &e : suffix_castles) { - assert(e.first.castle()); - shared_ptr h = makeHolder(*e.first.castle(), NFA_SUFFIX, - tbi.cc); - if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { - continue; - } - DEBUG_PRINTF("replace suffix with holder (%zu vertices)\n", - num_vertices(*h)); - for (auto v : e.second) { - assert(g[v].suffix.castle.get() == e.first.castle()); - g[v].suffix.graph = h; - g[v].suffix.castle.reset(); - changed = true; - } - } - - return changed; -} - -/** - * Runs over all the Castle engine prototypes in the graph and ensures that - * they have tops in a contiguous range, ready for construction. - */ -static -void remapCastleTops(RoseBuildImpl &tbi) { - ue2::unordered_map > rose_castles; - ue2::unordered_map > suffix_castles; - - RoseGraph &g = tbi.g; - for (auto v : vertices_range(g)) { - if (g[v].left.castle) { - rose_castles[g[v].left.castle.get()].push_back(v); - } - if (g[v].suffix.castle) { - suffix_castles[g[v].suffix.castle.get()].push_back(v); - } - } - - DEBUG_PRINTF("%zu rose castles, %zu suffix castles\n", rose_castles.size(), - suffix_castles.size()); - - map top_map; - - // Remap Rose Castles. - for (const auto &rc : rose_castles) { - CastleProto *c = rc.first; - const vector &verts = rc.second; - - DEBUG_PRINTF("rose castle %p (%zu repeats) has %zu verts\n", c, - c->repeats.size(), verts.size()); - - top_map.clear(); - remapCastleTops(*c, top_map); - - // Update the tops on the edges leading into vertices in v. - for (auto v : verts) { - for (const auto &e : in_edges_range(v, g)) { - g[e].rose_top = top_map.at(g[e].rose_top); - } - } - } - - // Remap Suffix Castles. - for (const auto &e : suffix_castles) { - CastleProto *c = e.first; - const vector &verts = e.second; - - DEBUG_PRINTF("suffix castle %p (%zu repeats) has %zu verts\n", c, - c->repeats.size(), verts.size()); - - top_map.clear(); - remapCastleTops(*c, top_map); - - // Update the tops on the suffixes. - for (auto v : verts) { - assert(g[v].suffix); - g[v].suffix.top = top_map.at(g[v].suffix.top); - } - } -} - #ifndef NDEBUG static bool historiesAreValid(const RoseGraph &g) { diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 8dccf0476..e55478013 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -1115,12 +1115,14 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { const PureRepeat &pr = castle.repeats.begin()->second; DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str()); + DEBUG_PRINTF("delay adj %u\n", (u32)delay_adj); DepthMinMax bounds(pr.bounds); // copy if (delay_adj > bounds.min) { - delay_adj = bounds.min; + bounds.min = 0; + } else { + bounds.min -= delay_adj; } - bounds.min -= delay_adj; bounds.max -= delay_adj; g[e].minBound = bounds.min; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 3112d6398..d4282d1ec 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -544,6 +544,9 @@ bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); +ue2_literal findNonOverlappingTail(const std::set &lits, + const ue2_literal &s); + void setReportId(NGHolder &g, ReportID id); #ifndef NDEBUG diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index cc5bbc70c..61e3d8747 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -320,6 +320,23 @@ const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi, return tbi.literals.right.at(literal_id); } +ue2_literal findNonOverlappingTail(const set &lits, + const ue2_literal &s) { + size_t max_overlap = 0; + + for (const auto &lit : lits) { + size_t overlap = lit != s ? maxStringOverlap(lit, s) + : maxStringSelfOverlap(s); + max_overlap = max(max_overlap, overlap); + } + + /* find the tail that doesn't overlap */ + ue2_literal tail = s.substr(max_overlap); + DEBUG_PRINTF("%zu overlap, tail: '%s'\n", max_overlap, + dumpString(tail).c_str()); + return tail; +} + size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const { size_t overlap = 0; for (auto u_lit_id : g[u].literals) { From 05beadf52fc12b8d7553e9d92817d4704ac256cb Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Dec 2015 14:41:57 +1100 Subject: [PATCH 003/218] Introduce REPEAT_ALWAYS model for {0,} castle repeats As Castle guards the repeats, no more state is needed for these repeats --- src/nfa/castlecompile.cpp | 2 +- src/nfa/lbr.c | 3 +++ src/nfa/repeat.c | 19 ++++++++++++++++++- src/nfa/repeat.h | 9 +++++++++ src/nfa/repeat_internal.h | 28 +++++++++++++++++----------- src/nfa/repeatcompile.cpp | 16 ++++++++++++++-- src/nfa/repeatcompile.h | 3 ++- unit/internal/repeat.cpp | 11 +++++++++-- 8 files changed, 73 insertions(+), 18 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index e5cc92675..2b0f61413 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -316,7 +316,7 @@ void buildSubcastles(const CastleProto &proto, vector &subs, bool is_reset = repeatInfoPair[i].second; enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max, - min_period, is_reset); + min_period, is_reset, true); RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period); DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i, diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index de93d4a34..0d69cc2ab 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -130,6 +130,9 @@ char repeatIsDead(const struct RepeatInfo *info, return lstate->ctrl.ring.offset == REPEAT_DEAD; case REPEAT_TRAILER: return lstate->ctrl.trailer.offset == REPEAT_DEAD; + case REPEAT_ALWAYS: + assert(!"REPEAT_ALWAYS should only be used by Castle"); + return 0; } assert(0); diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index c1ff51622..d12bc5a1d 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -922,6 +922,11 @@ void repeatPackOffset(char *dest, const struct RepeatInfo *info, const union RepeatControl *ctrl, u64a offset) { const struct RepeatOffsetControl *xs = &ctrl->offset; DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon); + if (!info->packedCtrlSize) { + assert(info->type == REPEAT_ALWAYS); + DEBUG_PRINTF("externally guarded .*\n"); + return; + } storePackedRelative(dest, xs->offset, offset, info->horizon, info->packedCtrlSize); } @@ -1040,6 +1045,9 @@ void repeatPack(char *dest, const struct RepeatInfo *info, case REPEAT_TRAILER: repeatPackTrailer(dest, info, ctrl, offset); break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } @@ -1072,7 +1080,13 @@ static void repeatUnpackOffset(const char *src, const struct RepeatInfo *info, u64a offset, union RepeatControl *ctrl) { struct RepeatOffsetControl *xs = &ctrl->offset; - xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize); + if (!info->packedCtrlSize) { + assert(info->type == REPEAT_ALWAYS); + DEBUG_PRINTF("externally guarded .*\n"); + xs->offset = 0; + } else { + xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize); + } DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset, info->horizon); } @@ -1149,6 +1163,9 @@ void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, case REPEAT_TRAILER: repeatUnpackTrailer(src, info, offset, ctrl); break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } diff --git a/src/nfa/repeat.h b/src/nfa/repeat.h index 37374d029..d4f84ea0a 100644 --- a/src/nfa/repeat.h +++ b/src/nfa/repeat.h @@ -135,6 +135,8 @@ u64a repeatLastTop(const struct RepeatInfo *info, return repeatLastTopSparseOptimalP(info, ctrl, state); case REPEAT_TRAILER: return repeatLastTopTrailer(info, ctrl); + case REPEAT_ALWAYS: + return 0; } DEBUG_PRINTF("bad repeat type %u\n", info->type); @@ -200,6 +202,8 @@ u64a repeatNextMatch(const struct RepeatInfo *info, return repeatNextMatchSparseOptimalP(info, ctrl, state, offset); case REPEAT_TRAILER: return repeatNextMatchTrailer(info, ctrl, offset); + case REPEAT_ALWAYS: + return offset + 1; } DEBUG_PRINTF("bad repeat type %u\n", info->type); @@ -275,6 +279,9 @@ void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, case REPEAT_TRAILER: repeatStoreTrailer(info, ctrl, offset, is_alive); break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } @@ -348,6 +355,8 @@ enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, return repeatHasMatchSparseOptimalP(info, ctrl, state, offset); case REPEAT_TRAILER: return repeatHasMatchTrailer(info, ctrl, offset); + case REPEAT_ALWAYS: + return REPEAT_MATCH; } assert(0); diff --git a/src/nfa/repeat_internal.h b/src/nfa/repeat_internal.h index bf479d1f8..9e3f455c8 100644 --- a/src/nfa/repeat_internal.h +++ b/src/nfa/repeat_internal.h @@ -47,26 +47,26 @@ enum RepeatType { /** General mechanism for tracking {N,M} repeats. Stores the first top as * an absolute offset, then subsequent tops in the {N,M} range as a ring of * relative top indices stored in a multibit. */ - REPEAT_RING = 0, + REPEAT_RING, /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure, * since only the first top encountered needs to be stored. */ - REPEAT_FIRST = 1, + REPEAT_FIRST, /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we * store the most recent top encountered. */ - REPEAT_LAST = 2, + REPEAT_LAST, /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases * where there is a large difference between N and M, and developed to * reduce the state requirements of this case (relative to the RING model). * Uses a small ordered array of top indices relative to \ref * RepeatRangeControl::offset. */ - REPEAT_RANGE = 3, + REPEAT_RANGE, /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref * RepeatBitmapControl structure at runtime. */ - REPEAT_BITMAP = 4, + REPEAT_BITMAP, /** Optimal mechanism for tracking {N,M} repeats when there is a bound on * how frequently they can be retriggered. @@ -78,13 +78,17 @@ enum RepeatType { * referencing a table that stores values from f(0, min) to f(repeat, min) * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9. * We search the optimal patch size between min and repeat in advance and - * use the scheme above to do encoding and decoding to reduce stream state size - * */ - REPEAT_SPARSE_OPTIMAL_P = 5, + * use the scheme above to do encoding and decoding to reduce stream state + * size. */ + REPEAT_SPARSE_OPTIMAL_P, - /** Used for {N,M} repeats where 0 < N < 64. Uses the \ref RepeatTrailerControl - * structure at runtime. */ - REPEAT_TRAILER = 6, + /** Used for {N,M} repeats where 0 < N < 64. Uses the + * \ref RepeatTrailerControl structure at runtime. */ + REPEAT_TRAILER, + + /** Degenerate repeat that always returns true. Used by castle for pseudo + * [^X]* repeats. */ + REPEAT_ALWAYS, }; /** @@ -204,6 +208,8 @@ const char *repeatTypeName(u8 type) { return "SPARSE_OPTIMAL_P"; case REPEAT_TRAILER: return "TRAILER"; + case REPEAT_ALWAYS: + return "ALWAYS"; } assert(0); return "UNKNOWN"; diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 2f1875036..2e1010bba 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -206,6 +206,13 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, packedFieldSizes[1] = repeatMin; packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; break; + case REPEAT_ALWAYS: + assert(repeatMin == 0ULL); + assert(repeatMax.is_infinite()); + stateSize = 0; // everything is in the control block. + horizon = 0; + packedCtrlSize = 0; + break; } DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, packedCtrlSize, horizon); @@ -232,9 +239,14 @@ u32 streamStateSize(enum RepeatType type, const depth &repeatMin, } enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, - u32 minPeriod, bool is_reset) { + u32 minPeriod, bool is_reset, + bool has_external_guard) { if (repeatMax.is_infinite()) { - return REPEAT_FIRST; + if (has_external_guard && !repeatMin) { + return REPEAT_ALWAYS; + } else { + return REPEAT_FIRST; + } } if (repeatMin == depth(0) || is_reset) { diff --git a/src/nfa/repeatcompile.h b/src/nfa/repeatcompile.h index 2800ccdb2..fe9a71062 100644 --- a/src/nfa/repeatcompile.h +++ b/src/nfa/repeatcompile.h @@ -68,7 +68,8 @@ struct RepeatStateInfo { * type. */ enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, - u32 minPeriod, bool is_reset); + u32 minPeriod, bool is_reset, + bool has_external_guard = false); u32 calcPackedBytes(u64a val); diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index 94f1bdc10..7f245e62f 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -193,7 +193,9 @@ static const RepeatTestInfo repeatTests[] = { { REPEAT_FIRST, 100, depth::infinity() }, { REPEAT_FIRST, 1000, depth::infinity() }, { REPEAT_FIRST, 3000, depth::infinity() }, - { REPEAT_FIRST, 10000, depth::infinity() } + { REPEAT_FIRST, 10000, depth::infinity() }, + // {,} repeats -- always + { REPEAT_ALWAYS, 0, depth::infinity() }, }; INSTANTIATE_TEST_CASE_P(Repeat, RepeatTest, ValuesIn(repeatTests)); @@ -289,6 +291,10 @@ TEST_P(RepeatTest, FillRing) { TEST_P(RepeatTest, FindTops) { SCOPED_TRACE(testing::Message() << "Repeat: " << info); + /* REPEAT_ALWAYS has no state and so does not track top locations */ + if (info.type == REPEAT_ALWAYS) { + return; + } repeatStore(&info, ctrl, state, 1000, 0); ASSERT_EQ(1000, repeatLastTop(&info, ctrl, state)); @@ -364,7 +370,8 @@ TEST_P(RepeatTest, TwoTops) { SCOPED_TRACE(testing::Message() << "Repeat: " << info); // Only appropriate for tests that store more than one top. - if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST) { + if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST + || info.type == REPEAT_ALWAYS) { return; } From b9c5d65f0e880bf2b1644bb6fc030ec059a9023e Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Dec 2015 15:15:02 +1100 Subject: [PATCH 004/218] Rework literal overlap checks for merging engines Also increase the size of chunks we consider merging for castles. --- src/rose/rose_build_merge.cpp | 382 ++++++++++++++++++++++++---------- 1 file changed, 269 insertions(+), 113 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 4c55a41f4..e42e0aca6 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -93,6 +93,7 @@ static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; static const size_t MERGE_GROUP_SIZE_MAX = 200; +static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; /** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ static const size_t DFA_CHUNK_SIZE_MAX = 200; @@ -799,47 +800,69 @@ static void chunkBouquets(const Bouquet &in, } } +static +bool stringsCanFinishAtSameSpot(const ue2_literal &u, + ue2_literal::const_iterator v_b, + ue2_literal::const_iterator v_e) { + ue2_literal::const_iterator u_e = u.end(); + ue2_literal::const_iterator u_b = u.begin(); + + while (u_e != u_b && v_e != v_b) { + --u_e; + --v_e; + + if (!overlaps(*u_e, *v_e)) { + return false; + } + } + + return true; +} + /** - * Prefix analysis: For lit1 with delay1 and lit2 with delay2, let L be the - * length of the largest suffix of lit1 that is a prefix of lit2. A merge is - * bad if L - delay1 > len(lit2) - delay2. + * Check that if after u has been seen, that it is impossible for the arrival of + * v to require the inspection of an engine earlier than u did. + * + * Let delta be the earliest that v can be seen after u (may be zero) * - * OR if we would have to check 2 literals of differing lags at the same - * point. + * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) + * ==> - ulag <= delta - vlag + * ==> vlag - ulag <= delta */ static bool checkPrefix(const rose_literal_id &ul, const u32 ulag, const rose_literal_id &vl, const u32 vlag) { - DEBUG_PRINTF("%s %s\n", escapeString(ul.s).c_str(), - escapeString(vl.s).c_str()); - if (ulag != vlag && (vl.delay || ul.delay || isSuffix(ul.s, vl.s))) { - /* rose literals should not be delayed anyway */ + DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, + escapeString(vl.s).c_str(), vlag); + + if (vl.delay || ul.delay) { + /* engine related literals should not be delayed anyway */ return false; } - // Note that maxOverlap also picks up infixes. - size_t overlap = maxOverlap(ul, vl); - if (overlap < ulag) { - return true; /* avoiding underflow */ + if (ulag >= vlag) { + assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); + return true; } - return overlap - ulag <= vl.elength() - vlag; -} - -bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, - RoseVertex v) { - assert(u != v); - const auto &ulits = tbi.g[u].literals; - const auto &vlits = tbi.g[v].literals; + size_t min_allowed_delta = vlag - ulag; + DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); - // We cannot merge roses that prefix literals in different tables. - if (tbi.literals.right.at(*ulits.begin()).table != - tbi.literals.right.at(*vlits.begin()).table) { - DEBUG_PRINTF("literals in different tables\n"); - return false; + for (size_t i = 0; i < min_allowed_delta; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; + } } - const left_id u_left(tbi.g[u].left), v_left(tbi.g[v].left); + DEBUG_PRINTF("OK\n"); + return true; +} + +static +bool hasSameEngineType(const RoseVertexProps &u_prop, + const RoseVertexProps &v_prop) { + const left_id u_left(u_prop.left), v_left(v_prop.left); if (u_left.haig() || v_left.haig()) { if (u_left.graph() != v_left.graph()) { @@ -859,11 +882,68 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, } } + return true; +} + +static +bool compatibleLiteralsForMerge( + const vector> &ulits, + const vector> &vlits) { + assert(!ulits.empty()); + assert(!vlits.empty()); + + // We cannot merge engines that prefix literals in different tables. + if (ulits[0].first->table != vlits[0].first->table) { + DEBUG_PRINTF("literals in different tables\n"); + return false; + } + + /* An engine requires that all accesses to it are ordered by offsets. (ie, + we can not check an engine's state at offset Y, if we have already + checked its status at offset X and X > Y). If we can not establish that + the literals used for triggering will statisfy this property, then it is + not safe to merge the engine. */ + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + u32 ulag = ue.second; + + if (ul.delay) { + return false; // We don't handle delayed cases yet. + } + + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + u32 vlag = ve.second; + + if (vl.delay) { + return false; // We don't handle delayed cases yet. + } + + if (!checkPrefix(ul, ulag, vl, vlag) + || !checkPrefix(vl, vlag, ul, ulag)) { + DEBUG_PRINTF("prefix check failed\n"); + return false; + } + } + } + + return true; +} + +bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, + RoseVertex v) { + assert(u != v); + + if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { + return false; + } + // UE-1675: in block mode, we want to be a little more selective -- only // merge prefix roses when the literal sets are the same. if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) { assert(tbi.isRootSuccessor(v)); - if (ulits != vlits) { + + if (tbi.g[u].literals != tbi.g[v].literals) { DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); return false; } @@ -883,88 +963,104 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, } } - // We accept any pair of literal sets A and B where no literal in A - // contains a literal in B and no literal in B contains a literal in A. - - const u32 ulag = tbi.g[u].left.lag; - const u32 vlag = tbi.g[v].left.lag; - - for (const u32 &ulit : ulits) { - const rose_literal_id &ul = tbi.literals.right.at(ulit); - - if (ul.delay) { - return false; // We don't handle delayed cases here. - } - - for (const u32 &vlit : vlits) { - const rose_literal_id &vl = tbi.literals.right.at(vlit); + u32 ulag = tbi.g[u].left.lag; + vector> ulits; + ulits.reserve(tbi.g[u].literals.size()); + for (u32 id : tbi.g[u].literals) { + ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); + } - if (vl.delay) { - return false; // We don't handle delayed cases here. - } + u32 vlag = tbi.g[v].left.lag; + vector> vlits; + vlits.reserve(tbi.g[v].literals.size()); + for (u32 id : tbi.g[v].literals) { + vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + } - if (!checkPrefix(ul, ulag, vl, vlag) || - !checkPrefix(vl, vlag, ul, ulag)) { - DEBUG_PRINTF("prefix check failed\n"); - return false; - } - } + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; } - DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", - tbi.g[u].idx, tbi.g[v].idx); + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx, + tbi.g[v].idx); return true; } +/* We cannot merge an engine, if a trigger literal and a post literal overlap + * in such a way that engine status needs to be check at a point before the + * engine's current location. + * + * i.e., for a trigger literal u and a pos literal v, + * where delta is the earliest v can appear after t, + * we require that v_loc - v_lag >= u_loc + * ==> u_loc + delta - v_lag >= u_loc + * ==> delta >= v_lag + * + */ static -bool mergeableDelays(const RoseBuildImpl &tbi, const flat_set &ulits, - const flat_set &vlits, u32 vlag) { - for (const u32 &ulit : ulits) { - const rose_literal_id &ul = tbi.literals.right.at(ulit); - assert(!ul.delay); // this should never have got this far? - for (const u32 vlit : vlits) { - const rose_literal_id &vl = tbi.literals.right.at(vlit); - assert(!vl.delay); // this should never have got this far? - - DEBUG_PRINTF("%s %s (lag %u, overlap %zu)\n", - escapeString(ul.s).c_str(), - escapeString(vl.s).c_str(), vlag, - maxOverlap(ul, vl)); - size_t l = vl.elength() - maxOverlap(ul, vl); - if (vlag > l) { - DEBUG_PRINTF("failed lag check!\n"); - return false; - } +bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, + u32 vlag) { + DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), + escapeString(vl.s).c_str(), vlag); + + for (size_t i = 0; i < vlag; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; } } + + DEBUG_PRINTF("OK\n"); return true; } -static +static never_inline bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, const deque &v2) { - set preds; + flat_set preds; for (auto v : v1) { insert(&preds, inv_adjacent_vertices(v, tbi.g)); } + flat_set pred_lits; + + /* No need to examine delays of a common pred - as it must already have + * survived the delay checks. + * + * This is important when the pred is in the anchored table as + * the literal is no longer available. */ + flat_set known_good_preds; + for (auto v : v2) { + insert(&known_good_preds, inv_adjacent_vertices(v, tbi.g)); + } + for (auto u : preds) { - const auto &pred_lits = tbi.g[u].literals; - for (auto v : v2) { - u32 vlag = tbi.g[v].left.lag; - DEBUG_PRINTF("consider (%zu, %zu) lag=%u\n", tbi.g[u].idx, - tbi.g[v].idx, vlag); - if (edge_by_target(u, v, tbi.g).second) { - /* no need to examine delays as it is a common pred - so checks - * must already have survived the delay checks. - * This is important when the pred is in the anchored table as - * the literal is no longer available. */ - DEBUG_PRINTF("ok, also %zu is also a pred of %zu\n", - tbi.g[u].idx, tbi.g[v].idx); - continue; - } - if (!mergeableDelays(tbi, pred_lits, tbi.g[v].literals, vlag)) { - return false; + if (!contains(known_good_preds, &u)) { + insert(&pred_lits, tbi.g[u].literals); + } + } + + vector pred_rose_lits; + pred_rose_lits.reserve(pred_lits.size()); + for (const auto &p : pred_lits) { + pred_rose_lits.push_back(&tbi.literals.right.at(p)); + } + + for (auto v : v2) { + u32 vlag = tbi.g[v].left.lag; + if (!vlag) { + continue; + } + + for (const u32 vlit : tbi.g[v].literals) { + const rose_literal_id &vl = tbi.literals.right.at(vlit); + assert(!vl.delay); // this should never have got this far? + for (const auto &ul : pred_rose_lits) { + assert(!ul->delay); // this should never have got this far? + + if (!checkPredDelay(*ul, vl, vlag)) { + return false; + } } } } @@ -976,17 +1072,79 @@ static bool mergeableRoseVertices(const RoseBuildImpl &tbi, const deque &verts1, const deque &verts2) { - for (auto v1 : verts1) { - for (auto v2 : verts2) { - if (!mergeableRoseVertices(tbi, v1, v2)) { + assert(!verts1.empty()); + assert(!verts2.empty()); + + RoseVertex u_front = verts1.front(); + RoseVertex v_front = verts2.front(); + + /* all vertices must have the same engine type: assume all verts in each + * group are already of the same type */ + if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { + return false; + } + + bool is_prefix = tbi.isRootSuccessor(u_front); + + /* We cannot merge prefixes/vertices if they are successors of different + * root vertices: similarly, assume the grouped vertices are compatible */ + if (is_prefix) { + assert(tbi.isRootSuccessor(v_front)); + set u_preds; + set v_preds; + insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); + insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); + + if (u_preds != v_preds) { + return false; + } + } + + vector> ulits; /* lit + lag pairs */ + for (auto a : verts1) { + // UE-1675: in block mode, we want to be a little more selective -- + // only merge prefix roses when the literal sets are the same. + if (!tbi.cc.streaming && is_prefix) { + assert(tbi.isRootSuccessor(a)); + + if (tbi.g[u_front].literals != tbi.g[a].literals) { + DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); return false; } } + + u32 ulag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); + } + } + + vector> vlits; + for (auto a : verts2) { + // UE-1675: in block mode, we want to be a little more selective -- + // only merge prefix roses when the literal sets are the same. + if (!tbi.cc.streaming && is_prefix) { + assert(tbi.isRootSuccessor(a)); + + if (tbi.g[u_front].literals != tbi.g[a].literals) { + DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); + return false; + } + } + + u32 vlag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; } // Check preds are compatible as well. - if (!checkPredDelays(tbi, verts1, verts2) || - !checkPredDelays(tbi, verts2, verts1)) { + if (!checkPredDelays(tbi, verts1, verts2) + || !checkPredDelays(tbi, verts2, verts1)) { return false; } @@ -1741,33 +1899,31 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { } static -void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) { +void mergeCastleChunk(RoseBuildImpl &tbi, RoseBouquet &cands) { + /* caller must have already ensured that candidates have the same reach */ RoseGraph &g = tbi.g; - DEBUG_PRINTF("%zu castle rose merge candidates\n", roses.size()); + DEBUG_PRINTF("%zu castle rose merge candidates\n", cands.size()); deque merged; - for (auto it = roses.begin(); it != roses.end(); ++it) { + for (auto it = cands.begin(); it != cands.end(); ++it) { left_id r1 = *it; CastleProto &castle1 = *r1.castle(); - const deque &verts1 = roses.vertices(r1); + const deque &verts1 = cands.vertices(r1); merged.clear(); - for (auto jt = next(it); jt != roses.end(); ++jt) { + for (auto jt = next(it); jt != cands.end(); ++jt) { left_id r2 = *jt; CastleProto &castle2 = *r2.castle(); - const deque &verts2 = roses.vertices(r2); + const deque &verts2 = cands.vertices(r2); if (castle1.repeats.size() == castle1.max_occupancy) { DEBUG_PRINTF("castle1 has hit max occupancy\n"); break; // next castle1 } - if (castle1.reach() != castle2.reach()) { - DEBUG_PRINTF("different reach\n"); - continue; // next castle2 - } + assert(castle1.reach() == castle2.reach()); if (!mergeableRoseVertices(tbi, verts1, verts2)) { DEBUG_PRINTF("not mergeable\n"); @@ -1793,12 +1949,12 @@ void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) { } } - roses.insert(r1, verts2); + cands.insert(r1, verts2); merged.push_back(r2); } DEBUG_PRINTF("%zu roses merged\n", merged.size()); - roses.erase_all(merged.begin(), merged.end()); + cands.erase_all(merged.begin(), merged.end()); } } @@ -1924,13 +2080,13 @@ void mergeCastleLeftfixes(RoseBuildImpl &tbi) { for (auto &m : by_reach) { DEBUG_PRINTF("%zu castles for reach: %s\n", m.second.size(), describeClass(m.first).c_str()); - RoseBouquet &roses = m.second; - deque rose_groups; - chunkBouquets(roses, rose_groups, MERGE_GROUP_SIZE_MAX); - roses.clear(); + RoseBouquet &candidates = m.second; + deque cand_groups; + chunkBouquets(candidates, cand_groups, MERGE_CASTLE_GROUP_SIZE_MAX); + candidates.clear(); - for (auto &group : rose_groups) { - mergeCastleRoses(tbi, group); + for (auto &group : cand_groups) { + mergeCastleChunk(tbi, group); } } } From e065c4d60bb1ff774660a8c4444e30b6f261837f Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Dec 2015 15:49:49 +1100 Subject: [PATCH 005/218] make nfaExecCastle0_QR() more efficent 1. Reverse scan for the last escape and only process later events. 2. Only scheck subcastles which may expire for staleness --- src/grey.cpp | 2 +- src/nfa/castle.c | 280 +++++++++++++++++++++++++------------- src/nfa/castle_dump.cpp | 1 + src/nfa/castle_internal.h | 4 + src/nfa/castlecompile.cpp | 40 +++++- src/nfa/nfa_api.h | 3 +- 6 files changed, 225 insertions(+), 105 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index 3f1699764..d08724150 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -54,7 +54,7 @@ Grey::Grey(void) : allowRose(true), allowExtendedNFA(true), /* bounded repeats of course */ allowLimExNFA(true), - allowSidecar(true), + allowSidecar(false), allowAnchoredAcyclic(true), allowSmallLiteralSet(true), allowCastle(true), diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 66e0ded68..274e5705a 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -162,6 +162,10 @@ static really_inline char castleInAccept(const struct Castle *c, struct mq *q, const ReportID report, const u64a offset) { DEBUG_PRINTF("offset=%llu\n", offset); + /* ignore when just catching up due to full queue */ + if (report == MO_INVALID_IDX) { + return 0; + } if (c->exclusive) { const u32 activeIdx = partial_load_u32(q->streamState, @@ -216,6 +220,11 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, void *full_state, void *stream_state) { DEBUG_PRINTF("offset=%llu\n", offset); + if (!c->staleIterOffset) { + DEBUG_PRINTF("{no repeats can go stale}\n"); + return; /* no subcastle can ever go stale */ + } + if (c->exclusive) { const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); if (activeIdx < c->numRepeats) { @@ -227,19 +236,27 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, if (!c->pureExclusive) { const u8 *active = (const u8 *)stream_state + c->activeIdxSize; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; - i = mmbit_iterate(active, c->numRepeats, i)) { + const struct mmbit_sparse_iter *it + = (const void *)((const char *)c + c->staleIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + u32 numRepeats = c->numRepeats; + u32 idx = 0; + + u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state); + while(i != MMB_INVALID) { DEBUG_PRINTF("subcastle %u\n", i); - subCastleDeactivateStaleSubs(c, offset, full_state, - stream_state, i); + subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i); + i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it, + si_state); } } } static really_inline void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, - void *full_state, void *stream_state) { + void *full_state, void *stream_state, + UNUSED char stale_checked) { assert(top < c->numRepeats); const struct SubCastle *sub = getSubCastle(c, top); @@ -263,8 +280,8 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, } else { DEBUG_PRINTF("repeat %u is already alive\n", top); // Caller should ensure we're not stale. - assert(repeatHasMatch(info, rctrl, rstate, offset) != - REPEAT_STALE); + assert(!stale_checked + || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE); // Ignore duplicate top events. u64a last = repeatLastTop(info, rctrl, rstate); @@ -589,7 +606,103 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, } static really_inline -void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) { +char castleRevScanVerm(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanNVerm(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanShufti(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const m128 mask_lo = c->u.shuf.mask_lo; + const m128 mask_hi = c->u.shuf.mask_hi; + const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanTruffle(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2, + buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + assert(begin <= end); + DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end); + if (begin == end) { + return 0; + } + + switch (c->type) { + case CASTLE_DOT: + // Nothing can stop a dot scan! + return 0; + case CASTLE_VERM: + return castleRevScanVerm(c, buf, begin, end, loc); + case CASTLE_NVERM: + return castleRevScanNVerm(c, buf, begin, end, loc); + case CASTLE_SHUFTI: + return castleRevScanShufti(c, buf, begin, end, loc); + case CASTLE_TRUFFLE: + return castleRevScanTruffle(c, buf, begin, end, loc); + default: + DEBUG_PRINTF("unknown scan type!\n"); + assert(0); + return 0; + } +} + +static really_inline +void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp, + char stale_checked) { const u32 event = q->items[q->cur].type; switch (event) { case MQE_TOP: @@ -603,11 +716,23 @@ void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) { assert(event < MQE_INVALID); u32 top = event - MQE_TOP_FIRST; DEBUG_PRINTF("top %u at offset %llu\n", top, sp); - castleProcessTop(c, top, sp, q->state, q->streamState); + castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked); break; } } +static really_inline +void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { + DEBUG_PRINTF("clearing active repeats due to escape\n"); + if (c->exclusive) { + partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); + } + + if (!c->pureExclusive) { + mmbit_clear(active, c->numRepeats); + } +} + static really_inline char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, enum MatchMode mode) { @@ -698,15 +823,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, } if (escape_found) { - DEBUG_PRINTF("clearing active repeats due to escape\n"); - if (c->exclusive) { - partial_store_u32(q->streamState, c->numRepeats, - c->activeIdxSize); - } - - if (!c->pureExclusive) { - mmbit_clear(active, c->numRepeats); - } + clear_repeats(c, q, active); } } @@ -720,7 +837,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, } sp = q_cur_offset(q); - castleHandleEvent(c, q, sp); + castleHandleEvent(c, q, sp, 1); q->cur++; } @@ -745,28 +862,34 @@ char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end) { return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH); } -static really_inline -void castleStreamSilent(const struct Castle *c, u8 *active, const u8 *buf, - size_t length) { - DEBUG_PRINTF("entry\n"); +static +s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { + assert(q_cur_type(q) == MQE_START); + assert(q_last_type(q) == MQE_END); + s64a sp = q_cur_loc(q); + s64a ep = q_last_loc(q); - // This call doesn't produce matches, so we elide the castleMatchLoop call - // entirely and just do escape scans to maintain the repeat. + DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep); - size_t eloc = 0; - char escaped = castleScan(c, buf, 0, length, &eloc); - if (escaped) { - assert(eloc < length); - DEBUG_PRINTF("escape found at %zu, clearing castle\n", eloc); - if (c->exclusive) { - partial_store_u32(active - c->activeIdxSize, - c->numRepeats, c->activeIdxSize); + size_t loc; + + if (ep > 0) { + if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) { + return (s64a)loc; } + ep = 0; + } - if (!c->pureExclusive) { - mmbit_clear(active, c->numRepeats); + if (sp < 0) { + s64a hlen = q->hlength; + + if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { + return (s64a)loc - hlen; } + ep = 0; } + + return sp - 1; /* the repeats are never killed */ } char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { @@ -780,76 +903,40 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(q->cur + 1 < q->end); /* require at least two items */ assert(q_cur_type(q) == MQE_START); - u64a sp = q_cur_offset(q); - q->cur++; - DEBUG_PRINTF("sp=%llu\n", sp); const struct Castle *c = getImplNfa(n); u8 *active = (u8 *)q->streamState + c->activeIdxSize; - char found = 0; - while (q->cur < q->end) { - DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), - q_cur_offset(q)); - found = 0; - if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } - } - - if (!found && !mmbit_any(active, c->numRepeats)) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } - u64a ep = q_cur_offset(q); + u64a end_offset = q_last_loc(q) + q->offset; + s64a last_kill_loc = castleLastKillLoc(c, q); + DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n", + last_kill_loc, q_cur_loc(q), q_last_loc(q)); + assert(last_kill_loc < q_last_loc(q)); - if (sp < q->offset) { - DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); - assert(q->offset - sp <= q->hlength); - u64a local_ep = MIN(q->offset, ep); - const u8 *ptr = q->history + q->hlength + sp - q->offset; - castleStreamSilent(c, active, ptr, local_ep - sp); - sp = local_ep; - } - - found = 0; - if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } - } + if (last_kill_loc != q_cur_loc(q) - 1) { + clear_repeats(c, q, active); + } - if (!found && !mmbit_any(active, c->numRepeats)) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } + q->cur++; /* skip start event */ - if (sp < ep) { - DEBUG_PRINTF("MAIN BUFFER SCAN\n"); - assert(ep - q->offset <= q->length); - const u8 *ptr = q->buffer + sp - q->offset; - castleStreamSilent(c, active, ptr, ep - sp); - } + /* skip events prior to the repeats being squashed */ + while (q_cur_loc(q) <= last_kill_loc) { + DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q)); + q->cur++; + assert(q->cur < q->end); + } -scan_done: - sp = q_cur_offset(q); - castleDeactivateStaleSubs(c, sp, q->state, q->streamState); - castleHandleEvent(c, q, sp); + while (q->cur < q->end) { + DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), + q_cur_offset(q)); + u64a sp = q_cur_offset(q); + castleHandleEvent(c, q, sp, 0); q->cur++; } - found = 0; + castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState); + + char found = 0; if (c->exclusive) { const u32 activeIdx = partial_load_u32(q->streamState, c->activeIdxSize); @@ -866,7 +953,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { return 0; } - if (castleInAccept(c, q, report, sp)) { + if (castleInAccept(c, q, report, end_offset)) { return MO_MATCHES_PENDING; } @@ -1013,4 +1100,3 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest, } return 0; } - diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index 5f906c2d2..dd0e369f2 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -100,6 +100,7 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) { fprintf(f, "unknown type %u\n", c->type); break; } + fprintf(f, "Stale Iter Offset: %u\n", c->staleIterOffset); fprintf(f, "\n"); dumpTextReverse(nfa, f); diff --git a/src/nfa/castle_internal.h b/src/nfa/castle_internal.h index 7a061d3d4..54578d67c 100644 --- a/src/nfa/castle_internal.h +++ b/src/nfa/castle_internal.h @@ -63,6 +63,7 @@ struct SubCastle { * - struct Castle * - struct SubCastle[numRepeats] * - tables for sparse model repeats + * - sparse iterator for subcastles that may be stale * * Castle stores an "active repeats" multibit in stream state, followed by the * packed repeat state for each SubCastle. If all SubCastles are mutual @@ -83,6 +84,9 @@ struct ALIGN_AVX_DIRECTIVE Castle { char pureExclusive; //!< tells us if all SubCastles are mutual exclusive u8 activeIdxSize; //!< number of bytes in stream state to store // active SubCastle id for exclusive mode + u32 staleIterOffset; // &subs, const vector> &repeatInfoPair, u32 &scratchStateSize, u32 &streamStateSize, u32 &tableSize, vector &tables, u32 &sparseRepeats, - const set &exclusiveGroup) { + const set &exclusiveGroup, vector &may_stale) { u32 i = 0; u32 maxStreamSize = 0; bool exclusive = exclusiveGroup.size() > 1; @@ -343,6 +344,10 @@ void buildSubcastles(const CastleProto &proto, vector &subs, streamStateSize += subStreamStateSize; } + if (pr.bounds.max.is_finite()) { + may_stale.push_back(i); + } + info.type = verify_u8(rtype); info.repeatMin = depth_to_u32(pr.bounds.min); info.repeatMax = depth_to_u32(pr.bounds.max); @@ -492,11 +497,20 @@ buildCastle(const CastleProto &proto, u32 tableSize = 0; u32 sparseRepeats = 0; + vector may_stale; /* sub castles that may go stale */ + buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, scratchStateSize, streamStateSize, tableSize, - tables, sparseRepeats, exclusiveGroup); + tables, sparseRepeats, exclusiveGroup, may_stale); + + DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); + vector stale_iter; + if (!may_stale.empty()) { + mmbBuildSparseIterator(stale_iter, may_stale, numRepeats); + } + - const size_t total_size = + size_t total_size = sizeof(NFA) + // initial NFA structure sizeof(Castle) + // Castle structure sizeof(SubCastle) * subs.size() + // SubCastles themselves @@ -506,6 +520,9 @@ buildCastle(const CastleProto &proto, sizeof(u64a) * sparseRepeats; // paddings for // REPEAT_SPARSE_OPTIMAL_P tables + total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); + total_size += byte_length(stale_iter); // stale sparse iter + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); nfa->type = verify_u8(CASTLE_NFA_0); nfa->length = verify_u32(total_size); @@ -515,7 +532,8 @@ buildCastle(const CastleProto &proto, nfa->minWidth = verify_u32(minWidth); nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0; - char *ptr = (char *)nfa.get() + sizeof(NFA); + char * const base_ptr = (char *)nfa.get() + sizeof(NFA); + char *ptr = base_ptr; Castle *c = (Castle *)ptr; c->numRepeats = verify_u32(subs.size()); c->exclusive = exclusive; @@ -560,6 +578,16 @@ buildCastle(const CastleProto &proto, sub->exclusive = 0; } } + + ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter); + + assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA)); + if (!stale_iter.empty()) { + c->staleIterOffset = verify_u32(ptr - base_ptr); + copy_bytes(ptr, stale_iter); + ptr += byte_length(stale_iter); + } + return nfa; } @@ -893,7 +921,7 @@ unique_ptr makeHolder(const CastleProto &proto, nfa_kind kind, unique_ptr g = ue2::make_unique(kind); for (const auto &m : proto.repeats) { - if (m.first >= CASTLE_MAX_TOPS) { + if (m.first >= NFA_MAX_TOP_MASKS) { DEBUG_PRINTF("top %u too big for an NFA\n", m.first); return nullptr; } diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h index 256549902..4e31a6254 100644 --- a/src/nfa/nfa_api.h +++ b/src/nfa/nfa_api.h @@ -189,7 +189,8 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); * be monotonically increasing. If not all the data was processed during * the call, the queue is updated to reflect the remaining work. * @param report we are interested in, if set at the end of the scan returns - * @ref MO_MATCHES_PENDING + * @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should + * be passed in. * @return @ref MO_ALIVE if the nfa is still active with no matches pending, * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not * alive From a7d8dafb71b1c6e3c59e3035d242bdc6d899e54d Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 26 Nov 2015 12:44:56 +1100 Subject: [PATCH 006/218] detach the sidecar --- CMakeLists.txt | 11 - src/grey.cpp | 2 - src/grey.h | 1 - src/nfagraph/ng_misc_opt.h | 4 +- src/nfagraph/ng_rose.cpp | 34 +- src/rose/block.c | 18 - src/rose/eod.c | 15 - src/rose/init.c | 22 - src/rose/match.c | 72 +--- src/rose/rose_build_add.cpp | 19 +- src/rose/rose_build_bytecode.cpp | 311 +------------ src/rose/rose_build_compile.cpp | 131 +----- src/rose/rose_build_convert.cpp | 35 +- src/rose/rose_build_dump.cpp | 4 - src/rose/rose_build_impl.h | 4 - src/rose/rose_build_misc.cpp | 16 - src/rose/rose_build_role_aliasing.cpp | 11 +- src/rose/rose_dump.cpp | 40 -- src/rose/rose_graph.h | 3 - src/rose/rose_in_graph.h | 6 - src/rose/rose_internal.h | 64 +-- src/rose/rose_sidecar_runtime.h | 101 ----- src/rose/stream.c | 7 - src/scratch.c | 14 +- src/scratch.h | 5 - src/sidecar/sidecar.c | 349 --------------- src/sidecar/sidecar.h | 74 ---- src/sidecar/sidecar_compile.cpp | 600 -------------------------- src/sidecar/sidecar_compile.h | 61 --- src/sidecar/sidecar_dump.cpp | 101 ----- src/sidecar/sidecar_dump.h | 46 -- src/sidecar/sidecar_generic.h | 223 ---------- src/sidecar/sidecar_internal.h | 156 ------- src/sidecar/sidecar_shufti.c | 127 ------ src/sidecar/sidecar_shufti.h | 38 -- unit/CMakeLists.txt | 1 - unit/internal/sidecar.cpp | 312 -------------- 37 files changed, 37 insertions(+), 3001 deletions(-) delete mode 100644 src/rose/rose_sidecar_runtime.h delete mode 100644 src/sidecar/sidecar.c delete mode 100644 src/sidecar/sidecar.h delete mode 100644 src/sidecar/sidecar_compile.cpp delete mode 100644 src/sidecar/sidecar_compile.h delete mode 100644 src/sidecar/sidecar_dump.cpp delete mode 100644 src/sidecar/sidecar_dump.h delete mode 100644 src/sidecar/sidecar_generic.h delete mode 100644 src/sidecar/sidecar_internal.h delete mode 100644 src/sidecar/sidecar_shufti.c delete mode 100644 src/sidecar/sidecar_shufti.h delete mode 100644 unit/internal/sidecar.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fbc006fc..3ff475955 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -451,12 +451,6 @@ set (hs_exec_SRCS src/nfa/vermicelli.h src/nfa/vermicelli_run.h src/nfa/vermicelli_sse.h - src/sidecar/sidecar.c - src/sidecar/sidecar.h - src/sidecar/sidecar_generic.h - src/sidecar/sidecar_internal.h - src/sidecar/sidecar_shufti.c - src/sidecar/sidecar_shufti.h src/som/som.h src/som/som_runtime.h src/som/som_runtime.c @@ -474,7 +468,6 @@ set (hs_exec_SRCS src/rose/match.c src/rose/miracle.h src/rose/runtime.h - src/rose/rose_sidecar_runtime.h src/rose/rose.h src/rose/rose_internal.h src/rose/rose_types.h @@ -762,8 +755,6 @@ SET (hs_SRCS src/parser/unsupported.h src/parser/utf8_validate.h src/parser/utf8_validate.cpp - src/sidecar/sidecar_compile.cpp - src/sidecar/sidecar_compile.h src/smallwrite/smallwrite_build.cpp src/smallwrite/smallwrite_build.h src/smallwrite/smallwrite_internal.h @@ -876,8 +867,6 @@ set(hs_dump_SRCS src/parser/dump.cpp src/parser/dump.h src/parser/position_dump.h - src/sidecar/sidecar_dump.cpp - src/sidecar/sidecar_dump.h src/smallwrite/smallwrite_dump.cpp src/smallwrite/smallwrite_dump.h src/som/slot_manager_dump.cpp diff --git a/src/grey.cpp b/src/grey.cpp index d08724150..69dab627f 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -54,7 +54,6 @@ Grey::Grey(void) : allowRose(true), allowExtendedNFA(true), /* bounded repeats of course */ allowLimExNFA(true), - allowSidecar(false), allowAnchoredAcyclic(true), allowSmallLiteralSet(true), allowCastle(true), @@ -207,7 +206,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowRose); G_UPDATE(allowExtendedNFA); G_UPDATE(allowLimExNFA); - G_UPDATE(allowSidecar); G_UPDATE(allowAnchoredAcyclic); G_UPDATE(allowSmallLiteralSet); G_UPDATE(allowCastle); diff --git a/src/grey.h b/src/grey.h index 10379e1ac..a22610520 100644 --- a/src/grey.h +++ b/src/grey.h @@ -54,7 +54,6 @@ struct Grey { bool allowRose; bool allowExtendedNFA; bool allowLimExNFA; - bool allowSidecar; bool allowAnchoredAcyclic; bool allowSmallLiteralSet; bool allowCastle; diff --git a/src/nfagraph/ng_misc_opt.h b/src/nfagraph/ng_misc_opt.h index c0e24da57..4955c7af0 100644 --- a/src/nfagraph/ng_misc_opt.h +++ b/src/nfagraph/ng_misc_opt.h @@ -60,8 +60,8 @@ struct BoundedRepeatSummary { bool improveGraph(NGHolder &g, som_type som); /** Sometimes the reach of a vertex is greater than it needs to be to reduce - * stop chars for the benefit of the rest of our code base (accel, sidecar, - * etc). In these circumstances, we can treat the reach as the smaller one as + * stop chars for the benefit of the rest of our code base (accel, etc). In + * these circumstances, we can treat the reach as the smaller one as * the graphs are equivalent. */ CharReach reduced_cr(NFAVertex v, const NGHolder &g, const std::map &br_cyclic); diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index f706e8585..3015af4c5 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -1505,16 +1505,10 @@ bool splitRoseEdge(RoseInGraph &ig, const VertLitInfo &split, } static -bool isStarCliche(const NGHolder &g, const ue2_literal &succ_lit, - const Grey &grey, CharReach *escapes_out) { +bool isStarCliche(const NGHolder &g) { DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); bool nonspecials_seen = false; - CharReach escapes; - - // Escapes are only available if we have the Sidecar engine available to - // implement them. - const u32 max_escapes = grey.allowSidecar ? MAX_ESCAPE_CHARS : 0; for (auto v : vertices_range(g)) { if (is_special(v, g)) { @@ -1526,8 +1520,7 @@ bool isStarCliche(const NGHolder &g, const ue2_literal &succ_lit, } nonspecials_seen = true; - escapes = ~g[v].char_reach; - if (escapes.count() > max_escapes) { + if (!g[v].char_reach.all()) { return false; } @@ -1547,14 +1540,6 @@ bool isStarCliche(const NGHolder &g, const ue2_literal &succ_lit, return false; } - /* we need to check that succ lit does not intersect with the escapes. */ - for (const auto &c : succ_lit) { - if ((escapes & c).any()) { - return false; - } - } - - *escapes_out = escapes; return true; } @@ -1620,16 +1605,13 @@ void processInfixes(RoseInGraph &ig, const CompileContext &cc) { if (delay != max_allowed_delay) { restoreTrailingLiteralStates(*h_new, lit2, delay); - delay = removeTrailingLiteralStates(*h_new, lit2, - max_allowed_delay); + delay = removeTrailingLiteralStates(*h_new, lit2, max_allowed_delay); } - CharReach escapes; - if (isStarCliche(*h_new, lit2, cc.grey, &escapes)) { + if (isStarCliche(*h_new)) { DEBUG_PRINTF("is a X star!\n"); ig[e].graph.reset(); ig[e].graph_lag = 0; - ig[e].escapes = escapes; } else { ig[e].graph = move(h_new); ig[e].graph_lag = delay; @@ -2410,7 +2392,6 @@ static void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { for (const auto &e : in_edges_range(v, g)) { const RoseInVertex u = source(e, g); - CharReach &escapes = g[e].escapes; if (!g[e].graph) { g[e].graph = make_shared(whatRoseIsThis(g, e)); @@ -2420,17 +2401,13 @@ void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { assert(!g[e].maxBound || g[e].maxBound == ROSE_BOUND_INF); if (g[u].type == RIV_START) { - assert(escapes.none()); add_edge(h.startDs, h.accept, h); h[h.startDs].reports.insert(0); } else if (g[e].maxBound == ROSE_BOUND_INF) { add_edge(h.start, h.accept, h); NFAVertex ds = add_vertex(h); - // Cyclic vertex which takes over handling the escapes inside - // the prefix graph. - h[ds].char_reach = ~escapes; - escapes.clear(); + h[ds].char_reach = CharReach::dot(); add_edge(h.start, ds, h); add_edge(ds, ds, h); @@ -2438,7 +2415,6 @@ void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { h[h.start].reports.insert(0); h[ds].reports.insert(0); } else { - assert(escapes.none()); add_edge(h.start, h.accept, h); h[h.start].reports.insert(0); } diff --git a/src/rose/block.c b/src/rose/block.c index 6f28832ef..ae7d5545c 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -34,7 +34,6 @@ #include "nfa/nfa_rev_api.h" #include "nfa/mcclellan.h" #include "util/fatbit.h" -#include "rose_sidecar_runtime.h" #include "rose.h" #include "rose_common.h" @@ -78,20 +77,6 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, } while (1); } -static really_inline -void init_sidecar(const struct RoseEngine *t, struct hs_scratch *scratch) { - if (!t->smatcherOffset) { - return; - } - - DEBUG_PRINTF("welcome to the sidecar\n"); - assert(t->initSideEnableOffset); - // We have to enable some sidecar literals - const char *template = (const char *)t + t->initSideEnableOffset; - - memcpy(&scratch->side_enabled, template, t->stateOffsets.sidecar_size); -} - static really_inline void init_state_for_block(const struct RoseEngine *t, u8 *state) { assert(t); @@ -172,15 +157,12 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->next_mpv_offset = 0; tctxt->curr_anchored_loc = MMB_INVALID; tctxt->curr_row_offset = 0; - tctxt->side_curr = 0; scratch->am_log_sum = 0; /* clear the anchored logs */ scratch->al_log_sum = 0; fatbit_clear(scratch->aqa); - init_sidecar(t, scratch); /* Init the sidecar enabled state */ - scratch->catchup_pq.qm_size = 0; init_outfixes_for_block(t, scratch, state, is_small_block); diff --git a/src/rose/eod.c b/src/rose/eod.c index 4b22af396..46605f93f 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -28,7 +28,6 @@ #include "catchup.h" #include "match.h" -#include "rose_sidecar_runtime.h" #include "rose.h" #include "util/fatbit.h" @@ -98,13 +97,6 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, DEBUG_PRINTF("eod offset=%llu, eod length=%zu\n", offset, eod_len); struct RoseContext *tctxt = &scratch->tctxt; - - /* update side_curr for eod_len */ - tctxt->side_curr = offset - eod_len; - - /* no need to enable any sidecar groups as they are for .*A.* constructs - * not allowed in the eod table */ - const struct HWLM *etable = getELiteralMatcher(t); hwlmExec(etable, eod_data, eod_len, adj, roseCallback, tctxt, tctxt->groups); @@ -238,9 +230,6 @@ void cleanupAfterEodMatcher(const struct RoseEngine *t, u8 *state, u64a offset, // Flush history to make sure it's consistent. roseFlushLastByteHistory(t, state, offset, tctxt); - - // Catch up the sidecar to cope with matches raised in the etable. - catchup_sidecar(tctxt, offset); } static rose_inline @@ -323,7 +312,6 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, // Unset the reports we just fired so we don't fire them again below. mmbit_clear(getRoleState(state), t->rolesWithStateCount); mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount); - sidecar_enabled_populate(t, scratch, state); hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming); if (rv == HWLM_TERMINATE_MATCHING) { @@ -368,9 +356,6 @@ void prepForEod(const struct RoseEngine *t, u8 *state, size_t length, struct RoseContext *tctxt) { roseFlushLastByteHistory(t, state, length, tctxt); tctxt->lastEndOffset = length; - if (t->requiresEodSideCatchup) { - catchup_sidecar(tctxt, length); - } } void roseBlockEodExec(const struct RoseEngine *t, u64a offset, diff --git a/src/rose/init.c b/src/rose/init.c index e87210b7a..c2eccd400 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -37,8 +37,6 @@ #include "nfa/mcclellan.h" #include "nfa/nfa_api_util.h" #include "nfa/nfa_internal.h" -#include "sidecar/sidecar.h" -#include "sidecar/sidecar_internal.h" #include "util/multibit.h" #include @@ -55,21 +53,6 @@ void init_rstate(const struct RoseEngine *t, u8 *state) { rstate->broken = NOT_BROKEN; } -static really_inline -void init_sidecar(const struct RoseEngine *t, u8 *state) { - assert(getSLiteralMatcher(t)); - - struct sidecar_enabled *enabled_state - = (struct sidecar_enabled *)(state + t->stateOffsets.sidecar); - - DEBUG_PRINTF("welcome to the sidecar\n"); - assert(t->initSideEnableOffset); - // We have to enable some sidecar literals - const char *template = (const char *)t + t->initSideEnableOffset; - - memcpy(enabled_state, template, t->stateOffsets.sidecar_size); -} - static really_inline void init_outfixes(const struct RoseEngine *t, u8 *state) { /* The active leaf array has been init'ed by the scatter with outfix @@ -105,11 +88,6 @@ void roseInitState(const struct RoseEngine *t, u8 *state) { init_rstate(t, state); - // Init the sidecar state - if (t->smatcherOffset) { - init_sidecar(t, state); - } - init_state(t, state); init_outfixes(t, state); diff --git a/src/rose/match.c b/src/rose/match.c index be9bc35ef..d71cbe43e 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -31,7 +31,6 @@ #include "infix.h" #include "match.h" #include "miracle.h" -#include "rose_sidecar_runtime.h" #include "rose.h" #include "som/som_runtime.h" #include "util/bitutils.h" @@ -230,28 +229,6 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, return tctx->groups; } -/* Note: uses the stashed sparse iter state; cannot be called from - * anybody else who is using it - */ -static never_inline -void roseSquashStates(const struct RoseEngine *t, const struct RoseSide *tsb, - struct RoseContext *tctxt) { - DEBUG_PRINTF("attempting to squash states\n"); - - struct mmbit_sparse_state *s = tctxtToScratch(tctxt)->sparse_iter_state; - u8 *state = tctxt->state; - void *role_state = getRoleState(state); - u32 role_count = t->rolesWithStateCount; - const struct mmbit_sparse_iter *it = getByOffset(t, tsb->squashIterOffset); - assert(ISALIGNED(it)); - - /* we can squash willy-nilly */ - DEBUG_PRINTF("squashing iter off = %u\n", tsb->squashIterOffset); - mmbit_sparse_iter_unset(role_state, role_count, it, s); - DEBUG_PRINTF("squashing groups with = %016llx\n", tsb->squashGroupMask); - tctxt->groups &= tsb->squashGroupMask; -} - static really_inline hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, @@ -936,9 +913,6 @@ void roseSetRole(const struct RoseEngine *t, u8 *state, // offset-tracking role. if (alreadySet) { DEBUG_PRINTF("role already set\n"); - if (tr->sidecarEnableOffset) { - enable_sidecar(tctxt, tr); - } return; } @@ -947,11 +921,6 @@ void roseSetRole(const struct RoseEngine *t, u8 *state, // Switch on this role's groups tctxt->groups |= tr->groups; - - if (tr->sidecarEnableOffset) { - // We have to enable some sidecar literals - enable_sidecar(tctxt, tr); - } } static rose_inline @@ -1551,20 +1520,6 @@ char roseWalkRootRoles(const struct RoseEngine *t, } } -void roseSidecarCallback(UNUSED u64a offset, u32 side_id, void *context) { - struct RoseContext *tctxt = context; - const struct RoseEngine *t = tctxt->t; - - DEBUG_PRINTF("SIDE MATCH side_id=%u offset=[%llu, %llu]\n", side_id, - offset, offset + 1); - assert(side_id < t->sideCount); - - const struct RoseSide *side = &getSideEntryTable(t)[side_id]; - roseSquashStates(t, side, tctxt); - - DEBUG_PRINTF("done with sc\n"); -} - /* handles catchup, som, cb, etc */ static really_inline hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, u8 *state, @@ -1674,15 +1629,6 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { tctxt->lastEndOffset = real_end; } - if (tl->requires_side - && real_end <= t->floatingMinLiteralMatchOffset) { - /* Catch up the sidecar to the literal location. This means that all - * squashing events are delivered before any 'side involved' literal - * matches at a given location. */ - - catchup_sidecar(tctxt, real_end); - } - /* anchored literals are root only */ if (!roseWalkRootRoles(t, tl, real_end, tctxt, 1, 0)) { rv = HWLM_TERMINATE_MATCHING; @@ -1762,18 +1708,6 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, return HWLM_CONTINUE_MATCHING; } - // If the current literal requires sidecar support, run to current - // location. - if (tl->requires_side) { - /* Catch up the sidecar to the literal location. This means that all - * squashing events are delivered before any 'side involved' literal - * matches at a given location. */ - - if (tl->rootRoleCount || tl->minDepth <= tctxt->depth) { - catchup_sidecar(tctxt, end); - } - } - if (tl->minDepth > tctxt->depth) { DEBUG_PRINTF("IGNORE: minDepth=%u > %u\n", tl->minDepth, tctxt->depth); goto root_roles; @@ -1848,7 +1782,7 @@ hwlmcb_rv_t playDelaySlot(struct RoseContext *tctxt, const u8 *delaySlotBase, DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, tctxt->groups); - /* delayed literals can't safely set groups, squashing may from side. + /* delayed literals can't safely set groups. * However we may be setting groups that successors already have * worked out that we don't need to match the group */ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups, @@ -1881,8 +1815,8 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(struct RoseContext *tctxt, u32 curr_loc) { DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, tctxt->groups); - /* anchored literals can't safely set groups, squashing may from - * side. However we may be setting groups that successors already + /* anchored literals can't safely set groups. + * However we may be setting groups that successors already * have worked out that we don't need to match the group */ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups, tctxt->groups); diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index cbd46df64..7fecaeec6 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -115,7 +115,6 @@ RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset, g[v].idx = build->vertexIndex++; g[v].min_offset = min_offset; g[v].max_offset = max_offset; - /* no escapes */ DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].idx, literalId); @@ -201,19 +200,16 @@ RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { namespace { struct created_key { explicit created_key(const RoseInEdgeProps &trep) - : prefix(trep.graph.get()), lag(trep.graph_lag), escapes(trep.escapes) { - assert(escapes.none() || !prefix); + : prefix(trep.graph.get()), lag(trep.graph_lag) { } bool operator<(const created_key &b) const { const created_key &a = *this; ORDER_CHECK(prefix); ORDER_CHECK(lag); - ORDER_CHECK(escapes); return false; } NGHolder *prefix; u32 lag; - CharReach escapes; }; } @@ -320,15 +316,6 @@ void createVertices(RoseBuildImpl *tbi, } NFAVertex p = pv.first; - if (isLeafNode(p, g)) { - DEBUG_PRINTF("setting escapes (reach %s) on parent\n", - describeClass(key.escapes, 20, CC_OUT_TEXT).c_str()); - g[p].escapes = key.escapes; - } else if (key.escapes != g[p].escapes) { - DEBUG_PRINTF("creating differently escaped version of parent\n"); - p = duplicate(tbi, p); - g[p].escapes = key.escapes; - } RoseEdge e; bool added; @@ -1106,10 +1093,6 @@ bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) { DEBUG_PRINTF("edge bounds\n"); return true; } - if (ig[e].escapes.any()) { - DEBUG_PRINTF("escapes\n"); - return true; - } RoseInVertex u = source(e, ig); if (ig[u].type == RIV_START) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e17953aad..3b8949e4c 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -57,8 +57,6 @@ #include "nfagraph/ng_stop.h" #include "nfagraph/ng_util.h" #include "nfagraph/ng_width.h" -#include "sidecar/sidecar.h" -#include "sidecar/sidecar_compile.h" #include "som/slot_manager.h" #include "util/alloc.h" #include "util/bitutils.h" @@ -329,21 +327,15 @@ u8 pickRuntimeImpl(const RoseBuildImpl &tbi, u32 outfixEndQueue) { } static -void fillStateOffsets(const RoseBuildImpl &tbi, const sidecar *side, - u32 rolesWithStateCount, u32 anchorStateSize, - u32 activeArrayCount, u32 activeLeftCount, - u32 laggedRoseCount, u32 floatingStreamStateRequired, - u32 historyRequired, RoseStateOffsets *so) { +void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, + u32 anchorStateSize, u32 activeArrayCount, + u32 activeLeftCount, u32 laggedRoseCount, + u32 floatingStreamStateRequired, u32 historyRequired, + RoseStateOffsets *so) { /* runtime state (including role state) first and needs to be u32-aligned */ u32 curr_offset = sizeof(RoseRuntimeState) + mmbit_size(rolesWithStateCount); - so->sidecar = curr_offset; - if (side) { - so->sidecar_size = sidecarEnabledSize(side); - curr_offset += so->sidecar_size; - } - so->activeLeafArray = curr_offset; /* TODO: limit size of array */ curr_offset += mmbit_size(activeArrayCount); @@ -1478,82 +1470,6 @@ u32 RoseBuildImpl::calcHistoryRequired() const { return m ? m - 1 : 0; } -static -u32 sizeSideSuccMasks(const sidecar *stable, - const map, set > &side_succ_map) { - if (!stable) { - return 0; - } - - return verify_u32((side_succ_map.size() + 1 /* for init */) - * sidecarEnabledSize(stable)); -} - -static -void populateSideSuccLists(const RoseBuildImpl &tbi, build_context &bc, - const sidecar *stable, RoseEngine *engine, u32 base_offset, - const map, set > &sidecar_succ_map) { - const RoseGraph &g = tbi.g; - - if (!stable) { - return; - } - - u32 enabled_size = sidecarEnabledSize(stable); - char *curr = (char *)engine + base_offset; - - for (const auto &e : sidecar_succ_map) { - u32 offset = verify_u32(curr - (char *)engine); - - memset(curr, 0, enabled_size); - /* populate the list */ - for (u32 side_id : e.first) { - sidecarEnabledAdd(stable, (sidecar_enabled *)curr, side_id); - } - - curr += enabled_size; - - /* update the role entries */ - for (RoseVertex v : e.second) { - if (v == tbi.root) { - DEBUG_PRINTF("setting root emask\n"); - engine->initSideEnableOffset = offset; - } else { - DEBUG_PRINTF("setting boring emask\n"); - assert(g[v].role < bc.roleTable.size()); - bc.roleTable[g[v].role].sidecarEnableOffset = offset; - } - } - } - - if (!engine->initSideEnableOffset) { - DEBUG_PRINTF("add a blank enabled for root\n"); - engine->initSideEnableOffset = verify_u32(curr - (char *)engine); - memset(curr, 0, enabled_size); - curr += enabled_size; - } -} - -/* Also creates a map of sidecar id set to the roles which enables that set - */ -static -void markSideEnablers(RoseBuildImpl &build, - map, set > *scmap) { - map > enablers; - u32 side_id = 0; - for (const auto &e : build.side_squash_roles) { - for (RoseVertex v : e.second) { - enablers[v].insert(side_id); - } - - side_id++; - } - - for (const auto &e : enablers) { - (*scmap)[e.second].insert(e.first); - } -} - #ifdef DEBUG static UNUSED string dumpMask(const vector &v) { @@ -1873,11 +1789,6 @@ bool isNoRunsVertex(const RoseBuildImpl &tbi, NFAVertex u) { return false; } - if (g[u].escapes.any()) { - DEBUG_PRINTF("u=%zu has escapes\n", g[u].idx); - return false; - } - /* TODO: handle non-root roles as well. It can't be that difficult... */ if (!in_degree_equal_to(u, g, 1)) { @@ -2176,35 +2087,6 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &tbi, return etable; } -static -aligned_unique_ptr buildSideMatcher(const RoseBuildImpl &tbi, - size_t *ssize) { - *ssize = 0; - - if (tbi.side_squash_roles.empty()) { - DEBUG_PRINTF("no sidecar\n"); - return nullptr; - } - assert(tbi.cc.grey.allowSidecar); - - vector sl; - - /* TODO: ensure useful sidecar entries only */ - for (const CharReach &cr : tbi.side_squash_roles | map_keys) { - sl.push_back(cr); - } - - aligned_unique_ptr stable = sidecarCompile(sl); - if (!stable) { - throw CompileError("Unable to generate bytecode."); - } - - *ssize = sidecarSize(stable.get()); - assert(*ssize); - DEBUG_PRINTF("built sidecar literal table size %zu bytes\n", *ssize); - return stable; -} - // Adds a sparse iterator to the end of the iterator table, returning its // offset. static @@ -3040,122 +2922,6 @@ pair buildEodAnchorRoles(RoseBuildImpl &tbi, build_context &bc, return addPredSparseIter(bc, predStates); } -static -void buildSideEntriesAndIters(const RoseBuildImpl &tbi, build_context &bc, - const set &squash_roles, - vector &sideTable) { - const RoseGraph &g = tbi.g; - - sideTable.push_back(RoseSide()); /* index in array gives an implicit id */ - RoseSide &tsb = sideTable.back(); - memset(&tsb, 0, sizeof(tsb)); - - if (squash_roles.empty()) { - return; - } - - set squashed_succ; - - // Build a vector of the roles' state IDs - vector states; - for (RoseVertex v : squash_roles) { - assert(g[v].role < bc.roleTable.size()); - const RoseRole &tr = bc.roleTable[g[v].role]; - DEBUG_PRINTF("considering role %u, state index %u\n", g[v].role, - tr.stateIndex); - assert(tr.stateIndex != MMB_INVALID); - - states.push_back(tr.stateIndex); - DEBUG_PRINTF("side %zu squashes state index %u/role %u\n", - sideTable.size() - 1, tr.stateIndex, g[v].role); - - /* we cannot allow groups to be squashed if the source vertex is in an - * anchored table due to ordering issue mean that a literals cannot - * set groups */ - if (tbi.isAnchored(v) && g[v].max_offset != 1) { - DEBUG_PRINTF("%u has anchored table pred no squashy\n", g[v].role); - continue; - } - - DEBUG_PRINTF("role %u is fine to g squash\n", g[v].role); - - for (auto w : adjacent_vertices_range(v, g)) { - if (in_degree(w, g) == 1) { /* TODO: improve: check that each pred - * is in id's squash role */ - squashed_succ.insert(w); - } - } - } - - // Build sparse iterators and add to table. - assert(!states.empty()); - - vector iter; - mmbBuildSparseIterator(iter, states, bc.numStates); - assert(!iter.empty()); - tsb.squashIterOffset = addIteratorToTable(bc, iter); - - // Build a mask of groups. - rose_group squash_groups = 0; - for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) { - if (!contains(tbi.group_to_literal, i)) { - continue; - } - - DEBUG_PRINTF("checking group %u for %zu's squash mask\n", i, - sideTable.size() - 1); - - const set &group_lits = tbi.group_to_literal.find(i)->second; - - /* check for each literal in this group if it is squashed by this - * sidecar escape */ - for (u32 lit : group_lits) { - DEBUG_PRINTF("inspecting lit %u\n", lit); - const rose_literal_info &this_info = tbi.literal_info.at(lit); - - /* check that all roles belonging to this literal are squashed */ - for (RoseVertex v : this_info.vertices) { - DEBUG_PRINTF("checking if role is squashed %u...\n", g[v].role); - if (squashed_succ.find(v) != squashed_succ.end()) { - continue; - } - - DEBUG_PRINTF("...role not taken %u\n", g[v].role); - - /* if the literal is length 1 and anchored (0,0) when can ignore - * it as any matching must have happened before the side lit - * arrived */ - if (g[v].max_offset == 1) { - DEBUG_PRINTF("we can ignore this role as 1st byte only\n"); - continue; - } - - goto fail_group; - } - } - - continue; - - fail_group: - DEBUG_PRINTF("group %u is not squashed\n", i); - /* we need to keep this group active */ - squash_groups |= 1ULL << i; - } - - DEBUG_PRINTF("%zu group squash mask: %016llx\n", sideTable.size() - 1, - squash_groups); - tsb.squashGroupMask = squash_groups; -} - -// Construct sparse iterators for squashes -static -void buildSideTable(const RoseBuildImpl &build, build_context &bc, - vector &sideTable) { - for (const auto &e : build.side_squash_roles) { - buildSideEntriesAndIters(build, bc, e.second, sideTable); - } -} - static void fillLookaroundTables(char *look_base, char *reach_base, const vector &look_vec) { @@ -3194,31 +2960,12 @@ bool hasBoundaryReports(const BoundaryReports &boundary) { return false; } -static -bool needsSidecarCatchup(const RoseBuildImpl &build, u32 id) { - const RoseGraph &g = build.g; - - for (RoseVertex v : build.literal_info.at(id).vertices) { - if (g[v].escapes.any()) { - return true; - } - - for (RoseVertex u : inv_adjacent_vertices_range(v, g)) { - if (g[u].escapes.any()) { - return true; - } - } - } - - return false; -} - static void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, vector &literalTable) { const u32 final_id = verify_u32(literalTable.size()); assert(contains(tbi.final_id_to_literal, final_id)); - const u32 literalId = *tbi.final_id_to_literal.at(final_id).begin(); + const UNUSED u32 literalId = *tbi.final_id_to_literal.at(final_id).begin(); /* all literal ids associated with this final id should result in identical * literal entry */ const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id); @@ -3275,8 +3022,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, } assert(!tbi.literals.right.at(literalId).delay || !tl.delay_mask); - - tl.requires_side = needsSidecarCatchup(tbi, literalId); } // Construct the literal table. @@ -3836,19 +3581,13 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { if (!engine->anchoredDistance) { return; } - - /* could be improved, if we have any side squash stuff and an anchored table - * set the min float distance to 0 */ - if (!build.side_squash_roles.empty()) { - engine->floatingMinDistance = 0; - } } aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DerivedBoundaryReports dboundary(boundary); // Build literal matchers - size_t asize = 0, fsize = 0, ssize = 0, esize = 0, sbsize = 0; + size_t asize = 0, fsize = 0, esize = 0, sbsize = 0; size_t floatingStreamStateRequired = 0; size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. @@ -3857,7 +3596,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { buildAnchoredAutomataMatcher(*this, &asize); aligned_unique_ptr ftable = buildFloatingMatcher( *this, &fsize, &historyRequired, &floatingStreamStateRequired); - aligned_unique_ptr stable = buildSideMatcher(*this, &ssize); aligned_unique_ptr etable = buildEodAnchoredMatcher(*this, &esize); aligned_unique_ptr sbtable = buildSmallBlockMatcher(*this, &sbsize); @@ -3925,9 +3663,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { tie(eodIterMapOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc, predTable); - vector sideTable; - buildSideTable(*this, bc, sideTable); - vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -3940,7 +3675,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 amatcherOffset = 0; u32 fmatcherOffset = 0; - u32 smatcherOffset = 0; u32 ematcherOffset = 0; u32 sbmatcherOffset = 0; @@ -3974,12 +3708,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset += (u32)fsize; } - if (stable) { - currOffset = ROUNDUP_CL(currOffset); - smatcherOffset = currOffset; - currOffset += (u32)ssize; - } - if (etable) { currOffset = ROUNDUP_CL(currOffset); ematcherOffset = currOffset; @@ -4002,9 +3730,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 literalLen = sizeof(RoseLiteral) * literalTable.size(); currOffset = literalOffset + literalLen; - u32 sideOffset = ROUNDUP_N(currOffset, alignof(RoseSide)); - currOffset = sideOffset + byte_length(sideTable); - u32 roleOffset = ROUNDUP_N(currOffset, alignof(RoseRole)); u32 roleLen = sizeof(RoseRole) * bc.roleTable.size(); currOffset = roleOffset + roleLen; @@ -4048,13 +3773,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 anchoredReportInverseMapOffset = currOffset; currOffset += arit.size() * sizeof(u32); - /* sidecar may contain sse in silly cases */ - currOffset = ROUNDUP_N(currOffset, 16); - u32 sideSuccListOffset = currOffset; - map, set > sidecar_succ_map; - markSideEnablers(*this, &sidecar_succ_map); - currOffset += sizeSideSuccMasks(stable.get(), sidecar_succ_map); - currOffset = ROUNDUP_N(currOffset, alignof(ReportID)); u32 multidirectOffset = currOffset; currOffset += mdr_reports.size() * sizeof(ReportID); @@ -4089,7 +3807,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { RoseStateOffsets stateOffsets; memset(&stateOffsets, 0, sizeof(stateOffsets)); - fillStateOffsets(*this, stable.get(), bc.numStates, anchorStateSize, + fillStateOffsets(*this, bc.numStates, anchorStateSize, activeArrayCount, activeLeftCount, laggedRoseCount, floatingStreamStateRequired, historyRequired, &stateOffsets); @@ -4125,11 +3843,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(fmatcherOffset >= base_nfa_offset); memcpy(ptr + fmatcherOffset, ftable.get(), fsize); } - if (stable) { - assert(smatcherOffset); - assert(smatcherOffset >= base_nfa_offset); - memcpy(ptr + smatcherOffset, stable.get(), ssize); - } if (etable) { assert(ematcherOffset); assert(ematcherOffset >= base_nfa_offset); @@ -4162,9 +3875,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->runtimeImpl = pickRuntimeImpl(*this, outfixEndQueue); engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); - engine->sideOffset = sideOffset; - engine->sideCount = verify_u32(sideTable.size()); - engine->activeArrayCount = activeArrayCount; engine->activeLeftCount = activeLeftCount; engine->queueCount = queue_count; @@ -4209,8 +3919,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->nonbenefits_base_id = nonbenefits_base_id; engine->literalBenefitsOffsets = base_lits_benefits_offset; - populateSideSuccLists(*this, bc, stable.get(), engine.get(), - sideSuccListOffset, sidecar_succ_map); engine->rosePrefixCount = rosePrefixCount; engine->activeLeftIterOffset @@ -4234,7 +3942,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ematcherOffset = ematcherOffset; engine->sbmatcherOffset = sbmatcherOffset; engine->fmatcherOffset = fmatcherOffset; - engine->smatcherOffset = smatcherOffset; engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); @@ -4251,7 +3958,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->hasFloatingDirectReports = floating_direct_report; engine->requiresEodCheck = hasEodAnchors(*this, built_nfas, outfixEndQueue); - engine->requiresEodSideCatchup = hasEodSideLink(); engine->hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); engine->canExhaust = rm.patternSetCanExhaust(); engine->hasSom = hasSom; @@ -4323,7 +4029,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); copy_bytes(ptr + engine->multidirectOffset, mdr_reports); copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); - copy_bytes(ptr + engine->sideOffset, sideTable); DEBUG_PRINTF("rose done %p\n", engine.get()); return engine; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 444ccdd94..fd507a119 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -485,21 +485,6 @@ size_t trailerDueToSelf(const rose_literal_id &lit) { return trailer; } -/* note: last byte cannot conflict as escapes are processed after other - * lits at same offset */ -static -bool conflictsWithEscape(const rose_literal_id &litv, const CharReach &cr) { - if (cr.none()) { - return false; - } - - if (litv.delay) { - return true; - } - - return contains(litv.s, cr); -} - static RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { const RoseGraph &g = tbi.g; @@ -618,7 +603,6 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { if (g[v].reports.empty() || g[v].eod_accept || // no accept EOD - g[v].escapes.any() || !g[v].isBoring() || !isLeafNode(v, g) || // Must have no out-edges in_degree(v, g) != 1) { // Role must have exactly one in-edge @@ -937,47 +921,6 @@ bool RoseBuildImpl::hasFinalId(u32 id) const { return literal_info.at(id).final_id != MO_INVALID_IDX; } -static -void doSidecarLiterals(RoseBuildImpl &tbi) { - map > escapes; - const RoseGraph &g = tbi.g; - - /* find escapes */ - for (auto v : vertices_range(g)) { - const CharReach &cr = g[v].escapes; - if (cr.none()) { - continue; - } - - DEBUG_PRINTF("vertex %zu has %zu escapes\n", g[v].idx, cr.count()); - - // We only have an implementation for these escapes if the Sidecar is - // available for use. - assert(tbi.cc.grey.allowSidecar); - - assert(!isLeafNode(v, g)); - - /* Verify that all the successors are floating */ - for (UNUSED auto w : adjacent_vertices_range(v, g)) { - assert(!tbi.isAnchored(w)); - } - - escapes[cr].insert(v); - } - - if (escapes.size() > 32) { - /* ensure that a most one sparse iterator is triggered per char */ - escapes = make_disjoint(escapes); - } - - /* create the squash/escape sidecar entries for the vertices and associate - * with appropriate roles */ - for (const auto &e : escapes) { - const CharReach &cr = e.first; - insert(&tbi.side_squash_roles[cr], e.second); - } -} - static bool eligibleForAlwaysOnGroup(const RoseBuildImpl &tbi, u32 id) { /* returns true if it or any of its delay versions have root role */ @@ -1310,42 +1253,6 @@ bool coversGroup(const RoseBuildImpl &tbi, const rose_literal_info &lit_info) { return true; } -static -bool escapesAllPreds(const RoseGraph &g, RoseVertex v, const CharReach &cr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if ((~g[u].escapes & cr).any()) { - return false; - } - } - - return true; -} - -static -bool mayNotSeeSubsequentPredsInOrder(const RoseBuildImpl &tbi, RoseVertex v) { - const RoseGraph &g = tbi.g; - - if (in_degree(v, g) == 1) { - /* if the pred can only match once, there are no subsequent preds */ - RoseVertex u = source(*in_edges(v, g).first, g); - if (g[u].max_offset == g[u].min_offset) { - return false; - } - } - - for (auto u : inv_adjacent_vertices_range(v, g)) { - for (u32 lit_id : g[u].literals) { - const rose_literal_id &lit = tbi.literals.right.at(lit_id); - if (lit.table == ROSE_ANCHORED) { - return true; - } - } - - } - - return false; -} - static bool isGroupSquasher(const RoseBuildImpl &tbi, const u32 id /* literal id */, rose_group forbidden_squash_group) { @@ -1402,35 +1309,6 @@ bool isGroupSquasher(const RoseBuildImpl &tbi, const u32 id /* literal id */, return false; } - /* Can only squash cases with escapes if all preds have the same escapes - * and none of the literals overlap with the escape - * - * Additionally, if we may not see one of the preds in time to turn on - * the group again we have problems. - * - * ARGHHHH - */ - if (g[v].escapes.any()) { - if (!escapesAllPreds(g, v, g[v].escapes) - || mayNotSeeSubsequentPredsInOrder(tbi, v)) { - return false; - } - - if (g[v].literals.size() == 1) { - if (conflictsWithEscape(tbi.literals.right.at(id), - g[v].escapes)) { - return false; - } - } else { - for (const auto &lit_id : g[v].literals) { - const rose_literal_id &lit = tbi.literals.right.at(lit_id); - if (lit.delay || contains(lit.s, g[v].escapes)) { - return false; - } - } - } - } - // Out-edges must have inf max bound, + no other shenanigans */ for (const auto &e : out_edges_range(v, g)) { if (g[e].maxBound != ROSE_BOUND_INF) { @@ -1453,9 +1331,8 @@ bool isGroupSquasher(const RoseBuildImpl &tbi, const u32 id /* literal id */, for (auto v : lit_info.vertices) { assert(!tbi.isAnyStart(v)); - // Can't squash cases with accepts or escapes - if (!g[v].reports.empty() - || (g[v].escapes.any() && !escapesAllPreds(g, v, g[v].escapes))) { + // Can't squash cases with accepts + if (!g[v].reports.empty()) { return false; } @@ -1908,7 +1785,6 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, // Clone vertex with the new literal ID. RoseVertex v = add_vertex(g[lit_v], g); g[v].idx = tbi.vertexIndex++; - g[v].escapes.clear(); g[v].literals.clear(); g[v].literals.insert(lit_id); g[v].min_offset = sai.min_bound + sai.literal.length(); @@ -2418,9 +2294,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { assignGroupsToRoles(); findGroupSquashers(*this); - // Collect squash literals for the sidecar - doSidecarLiterals(*this); - /* final prep work */ remapCastleTops(*this); allocateFinalLiteralId(*this); diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index e55478013..2ce211bf5 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -123,12 +123,12 @@ static unique_ptr convertLeafToHolder(const RoseGraph &g, const RoseEdge &t_e, const RoseLiteralMap &literals) { - RoseVertex t_u = source(t_e, g); RoseVertex t_v = target(t_e, g); // leaf vertex for demolition. - const CharReach escape_cr(~g[t_u].escapes); u32 minBound = g[t_e].minBound; u32 maxBound = g[t_e].maxBound; + const CharReach dot = CharReach::dot(); + assert(!g[t_v].left); auto out = ue2::make_unique(NFA_SUFFIX); @@ -138,14 +138,14 @@ unique_ptr convertLeafToHolder(const RoseGraph &g, u32 i = 1; NFAVertex last = out->start; for (; i <= minBound; i++) { - NFAVertex v = addHolderVertex(escape_cr, *out); + NFAVertex v = addHolderVertex(dot, *out); add_edge(last, v, *out); last = v; } NFAVertex last_mand = last; if (maxBound != ROSE_BOUND_INF) { for (; i <= maxBound; i++) { - NFAVertex v = addHolderVertex(escape_cr, *out); + NFAVertex v = addHolderVertex(dot, *out); add_edge(last_mand, v, *out); if (last != last_mand) { add_edge(last, v, *out); @@ -156,7 +156,7 @@ unique_ptr convertLeafToHolder(const RoseGraph &g, if (minBound) { add_edge(last_mand, last_mand, *out); } else { - NFAVertex v = addHolderVertex(escape_cr, *out); + NFAVertex v = addHolderVertex(dot, *out); add_edge(last_mand, v, *out); add_edge(v, v, *out); last = v; @@ -277,28 +277,10 @@ bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) { return true; } - /* more arbitrary magic numbers as riskier transform */ if (g[e].maxBound == ROSE_BOUND_INF) { - if (!tbi.cc.grey.roseConvertInfBadLeaves) { - return true; - } - - if (g[e].minBound > 20) { - DEBUG_PRINTF("fail minbound (%u)\n", maxbound); - return true; - } - - if (max_lit_len > 2) { - DEBUG_PRINTF("fail length\n"); - return true; - } - - if (g[u].escapes.none()) { - /* slightly risky as nfa won't die and we don't avoid running the - sidecar */ - DEBUG_PRINTF("fail: .*\n"); - return true; - } + /* slightly risky as nfa won't die */ + DEBUG_PRINTF("fail: .*\n"); + return true; } return false; @@ -386,7 +368,6 @@ void convertBadLeaves(RoseBuildImpl &tbi) { RoseVertex u = source(e, g); assert(!g[u].suffix); - g[u].escapes = CharReach(); g[u].suffix.graph = h; DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].idx, h.get()); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e36c54a26..d4918e4ff 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -181,10 +181,6 @@ class RoseGraphWriter { } } - if (g[v].escapes.any()) { - os << "\\nescapes="; - describeClass(os, g[v].escapes, 5, CC_OUT_DOT); - } if (ghost.find(v) != ghost.end()) { os << "\\nGHOST"; } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index d4282d1ec..39596d8fd 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -389,8 +389,6 @@ class RoseBuildImpl : public RoseBuild { std::unique_ptr generateDedupeAux() const override; - bool hasEodSideLink() const; - // Find the maximum bound on the edges to this vertex's successors. u32 calcSuccMaxBound(RoseVertex u) const; @@ -494,8 +492,6 @@ class RoseBuildImpl : public RoseBuild { u32 group_weak_end; u32 group_end; - std::map > side_squash_roles; - u32 anchored_base_id; u32 nonbenefits_base_id; diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 61e3d8747..8fbef8891 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -210,22 +210,6 @@ bool RoseBuildImpl::hasNoFloatingRoots() const { return true; } -bool RoseBuildImpl::hasEodSideLink(void) const { - for (auto v : vertices_range(g)) { - if (!g[v].eod_accept) { - continue; - } - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (g[u].escapes.any()) { - return true; - } - } - } - - return false; -} - size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const { const auto &lit_ids = g[v].literals; assert(!lit_ids.empty()); diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 88deaa257..57bbc5504 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -365,8 +365,7 @@ bool sameRoleProperties(const RoseBuildImpl &build, RoseVertex a, RoseVertex b) const RoseGraph &g = build.g; const RoseVertexProps &aprops = g[a], &bprops = g[b]; - if (aprops.eod_accept != bprops.eod_accept - || aprops.escapes != bprops.escapes) { + if (aprops.eod_accept != bprops.eod_accept) { return false; } @@ -457,12 +456,6 @@ size_t hashRightRoleProperties(RoseVertex v, const RoseGraph &g) { static void removeVertexFromMaps(RoseVertex v, RoseBuildImpl &build, revRoseMap &rrm) { - // Remove vertex 'a' from literal squash roles. Only sidecar literals can - // squash vertices, so they're the only ones we have to check. - for (auto &roles : build.side_squash_roles | map_values) { - roles.erase(v); - } - if (build.g[v].left) { const left_id left(build.g[v].left); assert(contains(rrm[left], v)); @@ -558,7 +551,6 @@ void mergeVertices(RoseVertex a, RoseVertex b, RoseBuildImpl &tbi, // Merge role properties. assert(g[a].eod_accept == g[b].eod_accept); - assert(g[a].escapes == g[b].escapes); assert(g[a].left == g[b].left); insert(&g[b].reports, g[a].reports); @@ -596,7 +588,6 @@ void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &tbi, // Merge role properties. For a diamond merge, most properties are already // the same (with the notable exception of the literal set). assert(g[a].eod_accept == g[b].eod_accept); - assert(g[a].escapes == g[b].escapes); assert(g[a].left == g[b].left); assert(g[a].reports == g[b].reports); assert(g[a].suffix == g[b].suffix); diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 4c4f4a79f..b9c0c05be 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -39,9 +39,6 @@ #include "nfa/nfa_build_util.h" #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" -#include "sidecar/sidecar.h" -#include "sidecar/sidecar_compile.h" -#include "sidecar/sidecar_dump.h" #include "util/multibit_internal.h" #include @@ -106,11 +103,6 @@ const HWLM *getFloatingMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); } -static -const sidecar *getSidecarMatcher(const RoseEngine *t) { - return (const sidecar *)loadFromByteCodeOffset(t, t->smatcherOffset); -} - static const HWLM *getEodMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); @@ -582,7 +574,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { const void *atable = getAnchoredMatcher(t); const HWLM *ftable = getFloatingMatcher(t); - const sidecar *stable = getSidecarMatcher(t); const HWLM *etable = getEodMatcher(t); const HWLM *sbtable = getSmallBlockMatcher(t); @@ -634,16 +625,12 @@ void roseDumpText(const RoseEngine *t, FILE *f) { } else { fprintf(f, "\n"); } - fprintf(f, " - sidecar matcher : %u bytes\n", - stable ? sidecarSize(stable) : 0); fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); fprintf(f, " - literal table : %zu bytes\n", t->literalCount * sizeof(RoseLiteral)); - fprintf(f, " - side table : %zu bytes\n", - t->sideCount * sizeof(RoseSide)); fprintf(f, " - role table : %zu bytes\n", t->roleCount * sizeof(RoseRole)); fprintf(f, " - pred table : %zu bytes\n", @@ -666,8 +653,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); fprintf(f, " - runtime state : %zu bytes\n", sizeof(RoseRuntimeState)); fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState); - fprintf(f, " - sidecar : %u bytes\n", - stable ? sidecarEnabledSize(stable) : 0U); fprintf(f, " - active array : %u bytes\n", mmbit_size(t->activeArrayCount)); fprintf(f, " - active rose : %u bytes\n", @@ -690,8 +675,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { literalsWithDirectReports(t)); fprintf(f, " - that squash group : %u\n", literalsWithProp(t, &RoseLiteral::squashesGroup)); - fprintf(f, " - need side catchup : %u\n", - literalsWithProp(t, &RoseLiteral::requires_side)); fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); u32 group_weak_end = t->group_weak_end; @@ -763,12 +746,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { hwlmPrintStats(ftable, f); } - if (stable) { - fprintf(f, "\nSidecar literal matcher stats:\n\n"); - fprintf(f, " Side Entries : %u\n", t->sideCount); - sidecarDump(stable, f); - } - if (etable) { fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); hwlmPrintStats(etable, f); @@ -792,7 +769,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U8(t, hasFloatingDirectReports); DUMP_U8(t, noFloatingRoots); DUMP_U8(t, requiresEodCheck); - DUMP_U8(t, requiresEodSideCatchup); DUMP_U8(t, hasEodEventLiteral); DUMP_U8(t, hasOutfixesInSmallBlock); DUMP_U8(t, runtimeImpl); @@ -816,7 +792,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, amatcherOffset); DUMP_U32(t, ematcherOffset); DUMP_U32(t, fmatcherOffset); - DUMP_U32(t, smatcherOffset); DUMP_U32(t, sbmatcherOffset); DUMP_U32(t, amatcherMinWidth); DUMP_U32(t, fmatcherMinWidth); @@ -827,8 +802,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, intReportCount); DUMP_U32(t, literalOffset); DUMP_U32(t, literalCount); - DUMP_U32(t, sideOffset); - DUMP_U32(t, sideCount); DUMP_U32(t, multidirectOffset); DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); @@ -872,8 +845,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, delayRebuildLength); DUMP_U32(t, stateOffsets.history); DUMP_U32(t, stateOffsets.exhausted); - DUMP_U32(t, stateOffsets.sidecar); - DUMP_U32(t, stateOffsets.sidecar_size); DUMP_U32(t, stateOffsets.activeLeafArray); DUMP_U32(t, stateOffsets.activeLeftArray); DUMP_U32(t, stateOffsets.activeLeftArray_size); @@ -891,7 +862,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, boundary.reportZeroEodOffset); DUMP_U32(t, totalNumLiterals); DUMP_U32(t, asize); - DUMP_U32(t, initSideEnableOffset); DUMP_U32(t, outfixBeginQueue); DUMP_U32(t, outfixEndQueue); DUMP_U32(t, leftfixBeginQueue); @@ -952,7 +922,6 @@ void roseDumpRoleStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(p, leftfixReport); DUMP_U32(p, leftfixLag); DUMP_U32(p, leftfixQueue); - DUMP_U32(p, sidecarEnableOffset); DUMP_U32(p, somAdjust); DUMP_U32(p, lookaroundIndex); DUMP_U32(p, lookaroundCount); @@ -976,7 +945,6 @@ void roseDumpInternals(const RoseEngine *t, const string &base) { const void *atable = getAnchoredMatcher(t); const HWLM *ftable = getFloatingMatcher(t); - const sidecar *stable = getSidecarMatcher(t); const HWLM *etable = getEodMatcher(t); if (atable) { @@ -995,14 +963,6 @@ void roseDumpInternals(const RoseEngine *t, const string &base) { } } - if (stable) { - FILE *f = fopen((base + "/sidecar.raw").c_str(), "w"); - if (f) { - fwrite(stable, 1, sidecarSize(stable), f); - fclose(f); - } - } - if (etable) { FILE *f = fopen((base + "/eod.raw").c_str(), "w"); if (f) { diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index 1b893a88f..e29fd2dd0 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -146,9 +146,6 @@ struct RoseVertexProps { /** \brief Bitmask of groups that this role sets. */ rose_group groups = 0; - /** \brief Characters that escape and squash this role. */ - CharReach escapes; - /** \brief Minimum role (end of literal) offset depth in bytes. */ u32 min_offset = ~u32{0}; diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index 15be6b0f3..2c00a418d 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -168,12 +168,6 @@ struct RoseInEdgeProps { std::shared_ptr haig; u32 graph_lag; - - /** \brief Escape characters, can be used instead of graph. - * - * currently must not intersect with succ literal and must be a literal - - * literal edge, TODO: handle */ - CharReach escapes; }; typedef boost::adjacency_list 1) u8 squashesGroup; /**< literal switches off its group behind it if it sets a * role */ - u8 requires_side; // need to catch up sidecar for this literal u32 delay_mask; /**< bit set indicates that the literal inserts a delayed * match at the given offset */ u32 delayIdsOffset; // offset to array of ids to poke in the delay structure }; -/* properties for sidecar entries, yay */ -struct RoseSide { - u32 squashIterOffset; // offset of the squash sparse iterator, rose relative - rose_group squashGroupMask; // squash literal squash masks -}; - /* Allocation of Rose literal ids * * The rose literal id space is segmented: @@ -130,8 +123,6 @@ struct RoseSide { * | | * | | * ---- - * - * Note: sidecar 'literals' are in a complete separate space */ /* Rose Literal Sources @@ -140,11 +131,10 @@ struct RoseSide { * 1) The floating table * 2) The anchored table * 3) Delayed literals - * 4) Sidecar literal matcher - * 5) suffixes NFAs - * 6) masksv2 (literals with benefits) - * 7) End anchored table - * 8) prefix / infix nfas + * 4) suffixes NFAs + * 5) masksv2 (literals with benefits) + * 6) End anchored table + * 7) prefix / infix nfas * * Care is required to ensure that events appear to come into Rose in order * (or sufficiently ordered for Rose to cope). Generally the progress of the @@ -162,13 +152,6 @@ struct RoseSide { * Delayed literal ordering is handled by delivering any pending delayed * literals before processing any floating match. * - * Sidecar: - * The sidecar matcher is unique in that it does not return match - * location information. Sidecar literals are escapes between two normal - * roles. The sidecar matcher is caught up to the floating matcher - * before any possible predecessor role, any possible successor role, and - * at stream boundaries^3. - * * Suffix: * Suffixes are always pure terminal roles. Prior to raising a match^2, pending * NFA queues are run to the current point (floating or delayed literal) as @@ -319,8 +302,6 @@ struct RoseRole { * leftfix engine status */ u32 leftfixQueue; /**< queue index of the prefix/infix before role */ u32 infixTriggerOffset; /* offset to list of infix roses to trigger */ - u32 sidecarEnableOffset; /**< offset to list of sidecar literals to enable - */ u32 somAdjust; /**< som for the role is offset from end match offset */ u32 lookaroundIndex; /**< index of lookaround offset/reach in table, or @@ -374,12 +355,6 @@ struct RoseStateOffsets { * reports with that ekey should not be delivered to the user. */ u32 exhausted; - /** Sidecar state. */ - u32 sidecar; - - /** Size of sidecar state, in bytes. */ - u32 sidecar_size; - /** Multibit for active suffix/outfix engines. */ u32 activeLeafArray; @@ -460,9 +435,8 @@ struct RoseBoundaryReports { // In memory, we follow this with: // 1a. anchored 'literal' matcher table // 1b. floating literal matcher table -// 1c. sidecar 'literal' matcher table -// 1d. eod-anchored literal matcher table -// 1e. small block table +// 1c. eod-anchored literal matcher table +// 1d. small block table // 2. array of RoseLiteral (literalCount entries) // 3. array of RoseRole (roleCount entries) // 4. array of RosePred (predCount entries) @@ -480,8 +454,6 @@ struct RoseEngine { u8 noFloatingRoots; /* only need to run the anchored table if something * matched in the anchored table */ u8 requiresEodCheck; /* stuff happens at eod time */ - u8 requiresEodSideCatchup; /* we need to do a sidecar catchup before eod - * checks */ u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time. u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even in small block scans. */ @@ -513,7 +485,6 @@ struct RoseEngine { u32 amatcherOffset; // offset of the anchored literal matcher (bytes) u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) - u32 smatcherOffset; // offset of the sidecar literal matcher (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern * involved with the anchored table to produce a full @@ -534,9 +505,6 @@ struct RoseEngine { u32 intReportCount; /**< number of internal_report structures */ u32 literalOffset; // offset of RoseLiteral array (bytes) u32 literalCount; // number of RoseLiteral entries [NOT number of literals] - u32 sideOffset; /**< offset of RoseSide array (bytes), indexed by - *sidecar ids */ - u32 sideCount; /**< number of RoseSide entries */ u32 multidirectOffset; /**< offset of multi-direct report list. */ u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array @@ -605,7 +573,6 @@ struct RoseEngine { struct RoseBoundaryReports boundary; u32 totalNumLiterals; /* total number of literals including dr */ u32 asize; /* size of the atable */ - u32 initSideEnableOffset; /* sidecar literals enabled initially */ u32 outfixBeginQueue; /* first outfix queue */ u32 outfixEndQueue; /* one past the last outfix queue */ u32 leftfixBeginQueue; /* first prefix/infix queue */ @@ -683,17 +650,6 @@ const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) { return (const struct HWLM *)lt; } -static really_inline -const void *getSLiteralMatcher(const struct RoseEngine *t) { - if (!t->smatcherOffset) { - return NULL; - } - - const char *st = (const char *)t + t->smatcherOffset; - assert(ISALIGNED_N(st, 8)); - return st; -} - static really_inline const void *getELiteralMatcher(const struct RoseEngine *t) { if (!t->ematcherOffset) { @@ -724,14 +680,6 @@ const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) { return tl; } -static really_inline -const struct RoseSide *getSideEntryTable(const struct RoseEngine *t) { - const struct RoseSide *rs - = (const struct RoseSide *)((const char *)t + t->sideOffset); - assert(ISALIGNED(rs)); - return rs; -} - static really_inline const struct RoseRole *getRoleTable(const struct RoseEngine *t) { const struct RoseRole *r diff --git a/src/rose/rose_sidecar_runtime.h b/src/rose/rose_sidecar_runtime.h deleted file mode 100644 index 92a717fa9..000000000 --- a/src/rose/rose_sidecar_runtime.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_SIDECAR_RUNTIME_H_1F746F6F237176 -#define ROSE_SIDECAR_RUNTIME_H_1F746F6F237176 - -#include "hwlm/hwlm.h" -#include "scratch.h" -#include "sidecar/sidecar.h" -#include "rose_common.h" -#include "ue2common.h" - -// Callback defined in match.c -void roseSidecarCallback(u64a offset, u32 side_id, void *context); - -static really_inline -void catchup_sidecar(struct RoseContext *tctxt, u64a end) { - DEBUG_PRINTF("catching up the sidecar from %llu to %llu\n", - tctxt->side_curr, end); - const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - struct core_info *ci = &scratch->core_info; - - if (!sidecar || tctxt->side_curr == end) { - return; - } - - const u8 *start; - if (tctxt->side_curr >= ci->buf_offset) { - start = ci->buf + tctxt->side_curr - ci->buf_offset; - assert(end <= ci->buf_offset + ci->len); - } else { - /* at eod time we are called running over the histroy */ - start = ci->hbuf + tctxt->side_curr - ci->buf_offset + ci->hlen; - assert(end <= ci->buf_offset); - } - size_t len = end - tctxt->side_curr; - - DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb); - sidecarExec(sidecar, start, len, &scratch->side_enabled.arb, - scratch->side_scratch, tctxt->side_curr, roseSidecarCallback, - tctxt); - tctxt->side_curr = end; - - DEBUG_PRINTF("finished catching up the sidecar to %llu\n", end); -} - -static rose_inline -void enable_sidecar(struct RoseContext *tctxt, const struct RoseRole *tr) { - assert(tr->sidecarEnableOffset); - const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t); - assert(sidecar); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - DEBUG_PRINTF("welcome to the sidecar\n"); - sidecarEnabledUnion(sidecar, &scratch->side_enabled.arb, - (const void *)((const char *)tctxt->t + tr->sidecarEnableOffset)); -} - -static really_inline -void sidecar_enabled_populate(const struct RoseEngine *t, - struct hs_scratch *scratch, const u8 *state) { - DEBUG_PRINTF("enabled-->%02hhx\n", *(state + t->stateOffsets.sidecar)); - memcpy(&scratch->side_enabled, state + t->stateOffsets.sidecar, - t->stateOffsets.sidecar_size); - DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb); -} - -static really_inline -void sidecar_enabled_preserve(const struct RoseEngine *t, - const struct hs_scratch *scratch, u8 *state) { - memcpy(state + t->stateOffsets.sidecar, &scratch->side_enabled, - t->stateOffsets.sidecar_size); -} - - -#endif /* ROSE_SIDECAR_RUNTIME_H_1F746F6F237176 */ diff --git a/src/rose/stream.c b/src/rose/stream.c index b100eeaef..4096c3564 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -37,7 +37,6 @@ #include "nfa/nfa_api_queue.h" #include "nfa/nfa_internal.h" #include "util/fatbit.h" -#include "rose_sidecar_runtime.h" #include "rose.h" static rose_inline @@ -407,8 +406,6 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, u8 *state, roseCatchUpLeftfixes(t, state, scratch); roseFlushLastByteHistory(t, state, offset + length, tctxt); tctxt->lastEndOffset = offset + length; - catchup_sidecar(tctxt, offset + length); - sidecar_enabled_preserve(t, scratch, state); storeGroups(t, state, tctxt->groups); struct RoseRuntimeState *rstate = getRuntimeState(state); rstate->stored_depth = tctxt->depth; @@ -473,8 +470,6 @@ void roseStreamExec(const struct RoseEngine *t, u8 *state, tctxt->next_mpv_offset = 0; tctxt->curr_anchored_loc = MMB_INVALID; tctxt->curr_row_offset = 0; - tctxt->side_curr = offset; - DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu\n", scratch->core_info.hlen, scratch->core_info.len); @@ -487,8 +482,6 @@ void roseStreamExec(const struct RoseEngine *t, u8 *state, streamInitSufPQ(t, state, scratch); } - sidecar_enabled_populate(t, scratch, state); - u8 delay_rb_status = rstate->flags; u32 alen = t->anchoredDistance > offset ? diff --git a/src/scratch.c b/src/scratch.c index 7afe7ec8c..b0888fdb2 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -42,7 +42,6 @@ #include "database.h" #include "nfa/limex_context.h" // for NFAContext128 etc #include "nfa/nfa_api_queue.h" -#include "sidecar/sidecar.h" #include "rose/rose_internal.h" #include "util/fatbit.h" #include "util/multibit.h" @@ -101,8 +100,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + som_store_size + som_now_size + som_attempted_size - + som_attempted_store_size - + proto->sideScratchSize + 15; + + som_attempted_store_size + 15; /* the struct plus the allocated stuff plus padding for cacheline * alignment */ @@ -214,10 +212,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->som_attempted_set = (struct fatbit *)current; current += som_attempted_size; - current = ROUNDUP_PTR(current, 16); - s->side_scratch = (void *)current; - current += proto->sideScratchSize; - current = ROUNDUP_PTR(current, 64); assert(ISALIGNED_CL(current)); s->fullState = (char *)current; @@ -328,12 +322,6 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->tStateSize = rose->tStateSize; } - const struct sidecar *side = getSLiteralMatcher(rose); - if (side && sidecarScratchSize(side) > proto->sideScratchSize) { - resize = 1; - proto->sideScratchSize = sidecarScratchSize(side); - } - u32 som_store_count = rose->somLocationCount; if (som_store_count > proto->som_store_count) { resize = 1; diff --git a/src/scratch.h b/src/scratch.h index 83e76da3d..1d329bda0 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -38,7 +38,6 @@ #include "ue2common.h" #include "util/multibit_internal.h" -#include "sidecar/sidecar_internal.h" #include "rose/rose_types.h" #ifdef __cplusplus @@ -131,7 +130,6 @@ struct RoseContext { u32 filledDelayedSlots; u32 curr_anchored_loc; /**< last read/written row */ u32 curr_row_offset; /**< last read/written entry */ - u64a side_curr; /**< current location of the sidecar scan (abs offset) */ u32 curr_qi; /**< currently executing main queue index during * \ref nfaQueueExec */ }; @@ -181,7 +179,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 anchored_literal_count; u32 delay_count; u32 scratchSize; - u32 sideScratchSize; u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; u32 roleCount; struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already @@ -196,8 +193,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u64a som_set_now_offset; /**< offset at which som_set_now represents */ u32 som_store_count; struct mmbit_sparse_state sparse_iter_state[MAX_SPARSE_ITER_STATES]; - union sidecar_enabled_any ALIGN_CL_DIRECTIVE side_enabled; - struct sidecar_scratch *side_scratch; }; static really_inline diff --git a/src/sidecar/sidecar.c b/src/sidecar/sidecar.c deleted file mode 100644 index 0abab06d1..000000000 --- a/src/sidecar/sidecar.c +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sidecar.h" -#include "sidecar_internal.h" -#include "sidecar_shufti.h" -#include "ue2common.h" -#include "nfa/vermicelli.h" -#include "util/bitutils.h" -#include "util/uniform_ops.h" - -static really_inline -u32 findAndClearLSB_8(u8 *v) { - u32 t = *v; - u32 rv = findAndClearLSB_32(&t); - *v = t; - return rv; -} - -static really_inline -u32 findAndClearLSB_128(m128 *v) { - union { - u32 words[sizeof(m128)/sizeof(u32)]; - m128 simd; - } s; - s.simd = *v; - u32 rv = 0; - for (u32 i = 0; i < ARRAY_LENGTH(s.words); i++) { - u32 *w = &s.words[i]; - if (*w) { - rv = findAndClearLSB_32(w) + 32 * i; - break; - } - } - - *v = s.simd; - return rv; -} - -static never_inline -u32 findAndClearLSB_256(m256 *v) { - union { - u32 words[sizeof(m256)/sizeof(u32)]; - m256 simd; - } s; - s.simd = *v; - u32 rv = 0; - for (u32 i = 0; i < ARRAY_LENGTH(s.words); i++) { - u32 *w = &s.words[i]; - if (*w) { - rv = findAndClearLSB_32(w) + 32 * i; - break; - } - } - - *v = s.simd; - return rv; -} - -#define DO_DEAD_CHECK 1 - -#define TAG 8 -#define STATE_T u8 -#include "sidecar_generic.h" - -#define TAG 32 -#define STATE_T u32 -#include "sidecar_generic.h" - -#define TAG 64 -#define STATE_T u64a -#include "sidecar_generic.h" - -#define TAG 128 -#define STATE_T m128 -#include "sidecar_generic.h" - -#define TAG 256 -#define STATE_T m256 -#include "sidecar_generic.h" - - -static never_inline -void sidecarExec_N(const struct sidecar_N *n, const u8 *b, size_t len, - struct sidecar_enabled_N *enabled, - UNUSED struct sidecar_scratch *scratch, - u64a base_offset, SidecarCallback cb, void *context) { - DEBUG_PRINTF("N: %hhu %hhu nc %hhu\n", n->c, b[0], n->nocase); - if (!enabled->bits) { - return; - } - - const u8 *loc = vermicelliExec(n->c, n->nocase, b, b + len); - - if (loc == b + len) { - return; - } - - enabled->bits = 0; - for (u32 i = 0; i < n->report_count; i++) { - cb(loc - b + base_offset, n->reports[i], context); - } -} - -static really_inline -void sidecarEnabledInit_N(struct sidecar_enabled *enabled) { - struct sidecar_enabled_N *e = (void *)enabled; - e->bits = 0; -} - -static really_inline -void sidecarExec_i_S(UNUSED const struct sidecar_S *n, - UNUSED const u8 *b, UNUSED size_t len, - UNUSED struct sidecar_enabled_S *enabled, - UNUSED u64a base_offset, UNUSED SidecarCallback cb, - UNUSED void *context) { - if (!enabled->bits) { - DEBUG_PRINTF("bail early, bail often\n"); - return; - } - - u8 state; - if (len >= 16) { - state = sidecarExec_S_int(n, b, len, enabled->bits); - } else { - const u8 *lo = (const u8 *)&n->lo; - const u8 *hi = (const u8 *)&n->hi; - state = enabled->bits; - for (u32 i = 0; i < len; i++) { - u8 c = b[i]; - state &= lo[c & 0xf] | hi[c >> 4]; - } - } - - state = ~state & enabled->bits; - if (!state) { - DEBUG_PRINTF("bail\n"); - return; - } - - enabled->bits &= ~state; - DEBUG_PRINTF("s = %02hhx e = %02hhx\n", state, enabled->bits); - u8 unshared = n->unshared_mask; - const u8 *masks = sidecar_ids_to_mask_const(n); - const struct sidecar_id_offset *id_map = n->id_list; - while (state) { - u32 bit = findAndClearLSB_8(&state); - DEBUG_PRINTF("found bit %u\n", bit); - const u32 *id_base = (const u32 *)((const char *)n - + id_map[bit].first_offset); - u32 count = id_map[bit].count; - for (u32 i = 0; i < count; ++i) { - DEBUG_PRINTF("firing %u\n", id_base[i]); - cb(base_offset, id_base[i], context); - enabled->bits &= ~(masks[id_base[i]] & unshared); - } - } - DEBUG_PRINTF("s = %02hhx e = %02hhx\n", state, enabled->bits); -} - -static really_inline -void sidecarEnabledInit_S(struct sidecar_enabled *enabled) { - struct sidecar_enabled_S *e = (void *)enabled; - e->bits = 0; -} - -static never_inline -void sidecarExec_S(const struct sidecar_S *n, const u8 *b, size_t len, - struct sidecar_enabled_S *enabled, - UNUSED struct sidecar_scratch *scratch, - u64a base_offset, SidecarCallback cb, void *context) { - if (len > 1) { - sidecarExec_i_S(n, b + 1, len - 1, enabled, base_offset + 1, cb, - context); - } - - u8 bits = enabled->bits; /* first byte doesn't change enabled */ - sidecarExec_i_S(n, b, 1, enabled, base_offset, cb, context); - enabled->bits = bits; -} - -void sidecarExec(const struct sidecar *n, const u8 *buffer, size_t len, - struct sidecar_enabled *enabled, - UNUSED struct sidecar_scratch *scratch, u64a base_offset, - SidecarCallback cb, void *ctxt) { - assert(n); - assert(enabled); - assert(len); - - assert(ISALIGNED_N(n, 16)); - assert(ISALIGNED_N(scratch, 16)); - - if (!len) { - return; - } - -#define EXEC_CASE(tag) \ - case SIDECAR_##tag: \ - sidecarExec_##tag((const struct sidecar_##tag *)n, buffer, len, \ - (struct sidecar_enabled_##tag *)enabled, scratch, \ - base_offset, cb, ctxt); \ - break; - - switch(n->type) { - EXEC_CASE(8) - EXEC_CASE(32) - EXEC_CASE(64) - EXEC_CASE(128) - EXEC_CASE(256) - EXEC_CASE(N) - EXEC_CASE(S) - default: - assert(0); - } - -#undef EXEC_CASE -} - -void sidecarEnabledInit(const struct sidecar *n, - struct sidecar_enabled *enabled) { - switch(n->type) { - case SIDECAR_8: - sidecarEnabledInit_8(enabled); - break; - case SIDECAR_32: - sidecarEnabledInit_32(enabled); - break; - case SIDECAR_64: - sidecarEnabledInit_64(enabled); - break; - case SIDECAR_128: - sidecarEnabledInit_128(enabled); - break; - case SIDECAR_256: - sidecarEnabledInit_256(enabled); - break; - case SIDECAR_N: - sidecarEnabledInit_N(enabled); - break; - case SIDECAR_S: - sidecarEnabledInit_S(enabled); - break; - default: - assert(0); - } -} - -u32 sidecarScratchSize(const struct sidecar *n) { - u32 width; - - switch(n->type) { - case SIDECAR_8: - width = sizeof(struct sidecar_mr_8); - break; - case SIDECAR_32: - width = sizeof(struct sidecar_mr_32); - break; - case SIDECAR_64: - width = sizeof(struct sidecar_mr_64); - break; - case SIDECAR_128: - width = sizeof(struct sidecar_mr_128); - break; - case SIDECAR_256: - width = sizeof(struct sidecar_mr_256); - break; - case SIDECAR_N: - return 0; /* no scratch required for N */ - case SIDECAR_S: - width = sizeof(struct sidecar_mr_8); - break; - default: - assert(0); - return 0; - } - - /* + 1, for first byte offset */ - return width * (n->mask_bit_count + 1); -} - -static really_inline -void sidecarEnabledUnion_N(struct sidecar_enabled *dest, - const struct sidecar_enabled *src) { - struct sidecar_enabled_N *d = (void *)dest; - const struct sidecar_enabled_N *s = (const void *)src; - d->bits |= s->bits; -} - -static really_inline -void sidecarEnabledUnion_S(struct sidecar_enabled *dest, - const struct sidecar_enabled *src) { - struct sidecar_enabled_S *d = (void *)dest; - const struct sidecar_enabled_S *s = (const void *)src; - d->bits |= s->bits; -} - -void sidecarEnabledUnion(const struct sidecar *n, struct sidecar_enabled *dest, - const struct sidecar_enabled *src) { - switch(n->type) { - case SIDECAR_8: - sidecarEnabledUnion_8(dest, src); - break; - case SIDECAR_32: - sidecarEnabledUnion_32(dest, src); - break; - case SIDECAR_64: - sidecarEnabledUnion_64(dest, src); - break; - case SIDECAR_128: - sidecarEnabledUnion_128(dest, src); - break; - case SIDECAR_256: - sidecarEnabledUnion_256(dest, src); - break; - case SIDECAR_N: - sidecarEnabledUnion_N(dest, src); - break; - case SIDECAR_S: - sidecarEnabledUnion_S(dest, src); - break; - default: - assert(0); - } -} diff --git a/src/sidecar/sidecar.h b/src/sidecar/sidecar.h deleted file mode 100644 index e0206de82..000000000 --- a/src/sidecar/sidecar.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SIDECAR_H -#define SIDECAR_H - -#include "ue2common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct sidecar; -struct sidecar_enabled; -struct sidecar_scratch; - -/* - * Sidecar is guaranteed to return the first match of a given id. However, in - * various cases later matches may also be returned, as may matches for disabled - * ids - */ -typedef void (*SidecarCallback)(u64a offset, u32 id, void *context); - -void sidecarExec(const struct sidecar *n, const u8 *buffer, size_t len, - struct sidecar_enabled *enabled, - struct sidecar_scratch *sidecar_scratch, - u64a base_offset, SidecarCallback cb, void *context); - -u32 sidecarScratchSize(const struct sidecar *n); - -void sidecarEnabledInit(const struct sidecar *n, - struct sidecar_enabled *enabled); - -/* Note: sidecar literals need to be reenabled after they match. - * This is purely because this behaviour is handy for rose. - * In rose, they always set their roles when fired (never have to postpone due - * to history) and if cleared their preds are also cleared so a pred would also - * have to match again before we need to care about them again - */ -void sidecarEnabledUnion(const struct sidecar *n, struct sidecar_enabled *dest, - const struct sidecar_enabled *src); - -#define ID_TERMINATOR (~0U) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/sidecar/sidecar_compile.cpp b/src/sidecar/sidecar_compile.cpp deleted file mode 100644 index 30ac032e3..000000000 --- a/src/sidecar/sidecar_compile.cpp +++ /dev/null @@ -1,600 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sidecar_compile.h" -#include "sidecar_internal.h" -#include "ue2common.h" -#include "nfa/shufticompile.h" -#include "util/alloc.h" -#include "util/charreach.h" -#include "util/simd_utils.h" -#include "util/verify_types.h" - -#include -#include -#include - -#include - -using namespace std; -using boost::adaptors::map_values; -using boost::adaptors::map_keys; - -namespace ue2 { - -static -void prune(array, N_CHARS> &by_char, u32 p, - map> *impl_classes) { - CharReach cr; - assert(!by_char[p].empty()); - - for (u32 i = 0; i < N_CHARS; i++) { - if (by_char[i] == by_char[p]) { - cr.set(i); - } - } - - assert(impl_classes->find(cr) == impl_classes->end()); - (*impl_classes)[cr] = by_char[p]; - - for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { - by_char[i].clear(); - } - -} - -static really_inline -void set_bit(u8 *a, size_t i) { - assert(i < 8); - *a |= 1U << i; -} - -static really_inline -void set_bit(u32 *a, size_t i) { - assert(i < 32); - *a |= 1U << i; -} - -static really_inline -void set_bit(u64a *a, size_t i) { - assert(i < 64); - *a |= 1ULL << i; -} - -static really_inline -void set_bit(m128 *a, size_t i) { - setbit128(a, i); -} - -static really_inline -void set_bit(m256 *a, size_t i) { - setbit256(a, i); -} - -template -static really_inline -void flip(s *v) { - *v = ~*v; -} - -static really_inline -void flip(m128 *v) { - *v = not128(*v); -} - -static really_inline -void flip(m256 *v) { - *v = not256(*v); -} - -template -static really_inline -void or_into_mask(s *a, const s b) { - *a |= b; -} - -static really_inline -void or_into_mask(m128 *a, const m128 b) { - *a = or128(*a, b); -} - -static really_inline -void or_into_mask(m256 *a, const m256 b) { - *a = or256(*a, b); -} - -template struct sidecar_traits { }; -#define MAKE_TRAITS(type_id, base_type_in, mask_bits) \ - template<> struct sidecar_traits { \ - typedef base_type_in base_type; \ - static const u32 bits = mask_bits; \ - typedef sidecar_##mask_bits impl_type; \ - typedef sidecar_enabled_##mask_bits enabled_type; \ - }; - -MAKE_TRAITS(SIDECAR_8, u8, 8) -MAKE_TRAITS(SIDECAR_32, u32, 32) -MAKE_TRAITS(SIDECAR_64, u64a, 64) -MAKE_TRAITS(SIDECAR_128, m128, 128) -MAKE_TRAITS(SIDECAR_256, m256, 256) - -template<> struct sidecar_traits { - typedef sidecar_N impl_type; -}; - -template<> struct sidecar_traits { - typedef u8 base_type; - typedef sidecar_S impl_type; -}; - -/* builds the main char reach table */ -template -static -void populateTable(const map> &impl_classes, - typename sidecar_traits::impl_type *ns) { - assert(impl_classes.size() - <= sizeof(typename sidecar_traits::base_type) * 8); - - u32 b = 0; - for (const CharReach &cr : impl_classes | map_keys) { - for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { - set_bit(&ns->reach[i], b); - } - b++; - } - - for (u32 i = 0; i < N_CHARS; i++) { - flip(&ns->reach[i]); - } -} - -/* builds the table controlling which bits in the mask to turn on for each - * external id */ -template -static -void populateIdMasks(const map> &impl_classes, - typename sidecar_traits::impl_type *ns) { - typedef typename sidecar_traits::base_type base; - base *table = (base *)((char *)ns + sizeof(*ns)); - u32 b = 0; - for (const set &id_list : impl_classes | map_values) { - for (const u32 id : id_list) { - set_bit(&table[id], b); - } - if (id_list.size() == 1) { - set_bit(&ns->unshared_mask, b); - } - b++; - } -} - -/* builds the lists of ids to report for each set bit */ -template -static -void populateMaskInfo(const map> &impl_classes, - u32 num_ext_classes, - typename sidecar_traits::impl_type *ns, - sidecar_id_offset *mask_info) { - typedef typename sidecar_traits::base_type base; - - u32 *curr_ptr = (u32 *)((char *)ns + sizeof(*ns) - + sizeof(base) * num_ext_classes); - curr_ptr = ROUNDUP_PTR(curr_ptr, sizeof(u32)); - - u32 b = 0; - for (const set &id_list : impl_classes | map_values) { - mask_info[b].first_offset = verify_u32((char *)curr_ptr - (char *)ns); - mask_info[b].count = verify_u32(id_list.size()); - for (const u32 id : id_list) { - *curr_ptr = id; - curr_ptr++; - } - b++; - } -} - -static -size_t calcIdListSize(const map> &impl_classes) { - size_t id_count = 0; - for (const auto &id_list : impl_classes | map_values) { - id_count += id_list.size(); - } - - return id_count * sizeof(u32); -} - -template -static -aligned_unique_ptr construct(const vector &ext_classes, - const map > &impl_classes_in, - bool allow_collapse) { - if (impl_classes_in.size() > sidecar_traits::bits) { - return nullptr; - } - - map> impl_classes_loc; - const map> *impl_classes; - - if (ext_classes.size() <= sidecar_traits::bits) { - /* we can directly map internal bits to external ids; no need for - * indirection */ - for (u32 i = 0; i < ext_classes.size(); i++) { - impl_classes_loc[ext_classes[i]].insert(i); - } - - impl_classes = &impl_classes_loc; - } else { - /* TODO: spread classes out if possible */ - if (!allow_collapse) { - return nullptr; - } - impl_classes = &impl_classes_in; - } - - typedef typename sidecar_traits::base_type base; - typedef typename sidecar_traits::impl_type impl; - - u32 id_count = verify_u32(ext_classes.size()); - size_t total_id_list_size = calcIdListSize(*impl_classes); - size_t size = sizeof(impl) + id_count * sizeof(base); /* ids -> masks */ - size = ROUNDUP_N(size, sizeof(u32)); - size += total_id_list_size; - DEBUG_PRINTF("allocated %zu\n", size); - - auto s = aligned_zmalloc_unique(size); - assert(s); // otherwise we would have thrown std::bad_alloc - impl *ns = (impl *)(s.get()); - - ns->header.type = s_type; - ns->header.size = size; - ns->header.id_count = id_count; - ns->header.mask_bit_count = verify_u32(impl_classes->size()); - - populateTable(*impl_classes, ns); - populateIdMasks(*impl_classes, ns); - populateMaskInfo(*impl_classes, id_count, ns, ns->id_list); - - return s; -} - -static -bool isNoodable(const CharReach &cr) { - return cr.count() == 1 || (cr.count() == 2 && cr.isBit5Insensitive()); -} - -template <> -aligned_unique_ptr -construct(const vector &ext_classes, - const map> &impl_classes, - bool) { - if (impl_classes.size() != 1 || !isNoodable(impl_classes.begin()->first)) { - return nullptr; - } - - const CharReach &cr = impl_classes.begin()->first; - const set &reports = impl_classes.begin()->second; - - u32 id_count = verify_u32(ext_classes.size()); - size_t size = sizeof(sidecar_N) + sizeof(u32) * reports.size(); - DEBUG_PRINTF("allocated %zu\n", size); - - auto s = aligned_zmalloc_unique(size); - assert(s); // otherwise we would have thrown std::bad_alloc - sidecar_N *ns = (sidecar_N *)(s.get()); - - ns->header.type = SIDECAR_N; - ns->header.size = size; - ns->header.id_count = id_count; - ns->header.mask_bit_count = verify_u32(impl_classes.size()); - - ns->c = cr.find_first(); - ns->nocase = cr.isBit5Insensitive(); - - ns->report_count = verify_u32(reports.size()); - u32 *p = ns->reports; - for (u32 report : reports) { - *p = report; - } - - return s; -} - -static -void flipShuftiMask(m128 *a) { - *a = not128(*a); -} - -template <> -aligned_unique_ptr -construct(const vector &ext_classes, - const map> &impl_classes, - bool) { - u32 id_count = verify_u32(ext_classes.size()); - size_t total_id_list_size = calcIdListSize(impl_classes); - size_t size = sizeof(sidecar_S) - + id_count * sizeof(u8); /* ids -> masks */ - size = ROUNDUP_N(size, sizeof(u32)); - size += total_id_list_size; - DEBUG_PRINTF("allocated %zu\n", size); - - auto s = aligned_zmalloc_unique(size); - assert(s); // otherwise we would have thrown std::bad_alloc - sidecar_S *ns = (sidecar_S *)(s.get()); - - ns->header.type = SIDECAR_S; - ns->header.size = size; - ns->header.id_count = id_count; - - vector shuf_bit_to_impl; - - /* populate the shufti masks */ - u32 used_bits = 0; - for (const CharReach &cr : impl_classes | map_keys) { - m128 lo, hi; - int bits = shuftiBuildMasks(cr, &lo, &hi); - - if (bits < 0 || used_bits + bits > 8) { - return nullptr; - } - - mergeShuftiMask(&ns->lo, lo, used_bits); - mergeShuftiMask(&ns->hi, hi, used_bits); - for (u32 i = used_bits; i < used_bits + bits; i++) { - shuf_bit_to_impl.push_back(&cr); - } - used_bits += bits; - } - - flipShuftiMask(&ns->lo); /* we are shift-or around here */ - flipShuftiMask(&ns->hi); - ns->header.mask_bit_count = used_bits; - - /* populate the enable masks */ - u8 *table = (u8 *)((char *)ns + sizeof(*ns)); - u32 b = 0; - for (const CharReach *cr : shuf_bit_to_impl) { - const set &rep_set = impl_classes.find(*cr)->second; - for (u32 report : rep_set) { - set_bit(&table[report], b); - } - if (rep_set.size() == 1) { - set_bit(&ns->unshared_mask, b); - } - b++; - } - - /* populate the report id masks */ - sidecar_id_offset temp_id_list[8]; - populateMaskInfo(impl_classes, id_count, ns, temp_id_list); - - u32 i = 0, j = 0; - auto iit = impl_classes.begin(); - while (i < shuf_bit_to_impl.size()) { - assert(iit != impl_classes.end()); - if (shuf_bit_to_impl[i] == &iit->first) { - ns->id_list[i] = temp_id_list[j]; - i++; - } else { - j++; - ++iit; - } - } - - return s; -} - -static -aligned_unique_ptr -constructWithHint(int hint, const vector &classes, - const map> &impl_classes) { - switch (hint) { - case SIDECAR_8: - return construct(classes, impl_classes, true); - case SIDECAR_32: - return construct(classes, impl_classes, true); - case SIDECAR_64: - return construct(classes, impl_classes, true); - case SIDECAR_128: - return construct(classes, impl_classes, true); - case SIDECAR_256: - return construct(classes, impl_classes, true); - case SIDECAR_N: - return construct(classes, impl_classes, true); - case SIDECAR_S: - return construct(classes, impl_classes, true); - default: - DEBUG_PRINTF("derp\n"); - assert(0); - return nullptr; - } -} - -aligned_unique_ptr sidecarCompile(const vector &classes, - int hint) { - array, N_CHARS> by_char; - - for (u32 i = 0; i < classes.size(); i++) { - const CharReach &cr = classes[i]; - for (size_t j = cr.find_first(); j != cr.npos; j = cr.find_next(j)) { - by_char[j].insert(i); - } - } - - map> impl_classes; - - bool changed; - do { - changed = false; - u32 smallest = N_CHARS; - for (u32 i = 0; i < N_CHARS; i++) { - if (by_char[i].empty()) { - continue; - } - - if (by_char[i].size() == 1) { - prune(by_char, i, &impl_classes); - changed = true; - } else if (smallest == N_CHARS || - by_char[i].size() < by_char[smallest].size()) { - smallest = i; - } - } - - if (!changed && smallest != N_CHARS) { - prune(by_char, smallest, &impl_classes); - changed = true; - } - } while (changed); - - DEBUG_PRINTF("matching %zu classes; %zu impl classes\n", classes.size(), - impl_classes.size()); - assert(impl_classes.size() <= N_CHARS); - - if (hint != SIDECAR_NO_HINT) { - return constructWithHint(hint, classes, impl_classes); - } - - aligned_unique_ptr (*facts[])(const vector &, - const map > &, bool) = { - construct, - // construct, TODO: first offset stuff for S - construct, - construct, - construct, - construct, - construct - }; - - for (u32 i = 0; i < ARRAY_LENGTH(facts); i++) { - auto sc = facts[i](classes, impl_classes, false); - if (sc) { - return sc; - } - } - - for (u32 i = 0; i < ARRAY_LENGTH(facts); i++) { - auto sc = facts[i](classes, impl_classes, true); - if (sc) { - return sc; - } - } - - return nullptr; -} - -u32 sidecarSize(const sidecar *ns) { - return ns->size; -} - -u32 sidecarEnabledSize(const sidecar *n) { - switch (n->type) { - case SIDECAR_8: - return sizeof(struct sidecar_enabled_8); - case SIDECAR_32: - return sizeof(struct sidecar_enabled_32); - case SIDECAR_64: - return sizeof(struct sidecar_enabled_64); - case SIDECAR_128: - return sizeof(struct sidecar_enabled_128); - case SIDECAR_256: - return sizeof(struct sidecar_enabled_256); - case SIDECAR_N: - return sizeof(struct sidecar_enabled_N); - case SIDECAR_S: - return sizeof(struct sidecar_enabled_S); - default: - assert(0); - } - return 0; -} - -template -static -void sidecarEnabledAdd_int(const sidecar *nn, struct sidecar_enabled *enabled, - u32 id) { - typedef typename sidecar_traits::enabled_type e_type; - typedef typename sidecar_traits::impl_type n_type; - e_type *e = (e_type *)enabled; - const n_type *n = (const n_type *)nn; - - DEBUG_PRINTF("enabling %u\n", id); - typedef typename sidecar_traits::base_type base; - const base *masks = (const base *)sidecar_ids_to_mask_const(n); - or_into_mask(&e->bits, masks[id]); -} - -template<> -void sidecarEnabledAdd_int(const sidecar *nn, - sidecar_enabled *enabled, u32 id) { - const sidecar_S *n = (const sidecar_S *)nn; - sidecar_enabled_S *e = (sidecar_enabled_S *)enabled; - const u8 *masks = (const u8 *)sidecar_ids_to_mask_const(n); - e->bits |= masks[id]; -} - -template<> -void sidecarEnabledAdd_int(UNUSED const sidecar *n, - struct sidecar_enabled *enabled, - UNUSED u32 id) { - sidecar_enabled_N *e = (sidecar_enabled_N *)enabled; - /* assuming we are not being called by a complete idiot, there is only one - * thing we could be asked to do here */ - e->bits = 1; -} - -void sidecarEnabledAdd(const sidecar *n, struct sidecar_enabled *enabled, - u32 id) { - DEBUG_PRINTF("enabling %hhu:%u\n", n->type, id); - switch (n->type) { - case SIDECAR_8: - sidecarEnabledAdd_int(n, enabled, id); - break; - case SIDECAR_32: - sidecarEnabledAdd_int(n, enabled, id); - break; - case SIDECAR_64: - sidecarEnabledAdd_int(n, enabled, id); - break; - case SIDECAR_128: - sidecarEnabledAdd_int(n, enabled, id); - break; - case SIDECAR_256: - sidecarEnabledAdd_int(n, enabled, id); - break; - case SIDECAR_N: - sidecarEnabledAdd_int(n, enabled, id); - break; - case SIDECAR_S: - sidecarEnabledAdd_int(n, enabled, id); - break; - default: - assert(0); - } -} - -} // namespace ue2 diff --git a/src/sidecar/sidecar_compile.h b/src/sidecar/sidecar_compile.h deleted file mode 100644 index 2a8c2c32c..000000000 --- a/src/sidecar/sidecar_compile.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SIDECAR_COMPILE_H -#define SIDECAR_COMPILE_H - -#include "ue2common.h" -#include "util/alloc.h" - -#include -#include - -struct sidecar; -struct sidecar_enabled; - -namespace ue2 { - -class CharReach; - -#define SIDECAR_NO_HINT (-1) - -/* - * match ids are given by position in the report_map vector - */ -aligned_unique_ptr -sidecarCompile(const std::vector &classes, - int hint = SIDECAR_NO_HINT); - -u32 sidecarSize(const sidecar *ns); -u32 sidecarEnabledSize(const sidecar *n); -void sidecarEnabledAdd(const sidecar *n, struct sidecar_enabled *enabled, - u32 id); - -} // namespace ue2 - -#endif diff --git a/src/sidecar/sidecar_dump.cpp b/src/sidecar/sidecar_dump.cpp deleted file mode 100644 index dfcf4dd2d..000000000 --- a/src/sidecar/sidecar_dump.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "sidecar_dump.h" -#include "sidecar_internal.h" -#include "ue2common.h" - -#include - -#ifndef DUMP_SUPPORT -#error No dump support! -#endif - -namespace ue2 { - -static -void dumpSideShuf(const sidecar_S *s, FILE *f) { - fprintf(f, "lo:"); - for (u32 i = 0; i < 16; i++) { - fprintf(f, " %02hhx", ((const u8 *)&s->lo)[i]); - } - fprintf(f, "\n"); - - fprintf(f, "hi:"); - for (u32 i = 0; i < 16; i++) { - fprintf(f, " %02hhx", ((const u8 *)&s->hi)[i]); - } - fprintf(f, "\n"); - - const u8 *enables = (const u8 *)sidecar_ids_to_mask_const(s); - fprintf(f, "shufti masks per id\n"); - for (u32 i = 0; i < s->header.id_count; i++) { - fprintf(f, "%u: %02hhx\n", i, enables[i]); - } -} - -void sidecarDump(const sidecar *s, FILE *f) { - const char *type = "?"; - switch(s->type) { - case SIDECAR_8: - type = "8"; - break; - case SIDECAR_32: - type = "32"; - break; - case SIDECAR_64: - type = "64"; - break; - case SIDECAR_128: - type = "128"; - break; - case SIDECAR_256: - type = "256"; - break; - case SIDECAR_N: - type = "N"; - break; - case SIDECAR_S: - type = "S"; - break; - default: - assert(0); - } - - fprintf(f, "Sidecar: %s\n", type); - fprintf(f, " size: %u\n", s->size); - fprintf(f, " used bits: %u\n", s->mask_bit_count); - fprintf(f, " ids: %u\n", s->id_count); - if (s->type == SIDECAR_S) { - dumpSideShuf((const sidecar_S *)s, f); - } -} - -} // namespace ue2 diff --git a/src/sidecar/sidecar_dump.h b/src/sidecar/sidecar_dump.h deleted file mode 100644 index 8b6625f40..000000000 --- a/src/sidecar/sidecar_dump.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SIDECAR_DUMP_H -#define SIDECAR_DUMP_H - -#if defined(DUMP_SUPPORT) - -#include - -struct sidecar; - -namespace ue2 { - -void sidecarDump(const sidecar *s, FILE *f); - -} // namespace ue2 - -#endif - -#endif diff --git a/src/sidecar/sidecar_generic.h b/src/sidecar/sidecar_generic.h deleted file mode 100644 index f5788feca..000000000 --- a/src/sidecar/sidecar_generic.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* in param TAG, STATE_T */ - -#include "util/join.h" - -#if TAG == 8 -#define ISTATE_T u32 -#else -#define ISTATE_T STATE_T -#endif - -#define EXEC_FN JOIN(sidecarExec_, TAG) -#define EXEC_I_FN JOIN(sidecarExec_i_, TAG) -#define ENABLED_INIT_FN JOIN(sidecarEnabledInit_, TAG) -#define ENABLED_UNION_FN JOIN(sidecarEnabledUnion_, TAG) -#define ENABLED_STRUCT JOIN(struct sidecar_enabled_, TAG) -#define PLAY_CB_FB JOIN(sidecarPlayCallbacks_, STATE_T) -#define SIDECAR_STRUCT JOIN(struct sidecar_, TAG) -#define MR_STRUCT JOIN(struct sidecar_mr_, TAG) -#define load_state JOIN(load_, STATE_T) -#define store_state JOIN(store_, STATE_T) -#define and_state JOIN(and_, STATE_T) -#define iand_state JOIN(and_, ISTATE_T) -#define andnot_state JOIN(andnot_, STATE_T) -#define or_state JOIN(or_, STATE_T) -#define is_zero JOIN(isZero_, STATE_T) -#define iis_zero JOIN(isZero_, ISTATE_T) -#define is_not_zero JOIN(isNonZero_, ISTATE_T) -#define not_eq JOIN(noteq_, STATE_T) -#define inot_eq JOIN(noteq_, ISTATE_T) -#define find_and_clear_lsb JOIN(findAndClearLSB_, TAG) -#define zero_state JOIN(zero_, ISTATE_T) - -#if TAG <= 64 -#define TDEBUG_PRINTF(...) DEBUG_PRINTF(__VA_ARGS__) -#define ATDEBUG_PRINTF(...) ADEBUG_PRINTF(__VA_ARGS__) -#else -#define TDEBUG_PRINTF(...) do { } while(0) -#define ATDEBUG_PRINTF(...) do { } while(0) -#endif - -MR_STRUCT { - const u8 *loc; - STATE_T mask; -}; - -static really_inline -void PLAY_CB_FB(const SIDECAR_STRUCT *n, const u8 *b, const MR_STRUCT *matches, - u32 match_len, ENABLED_STRUCT *enabled, u64a base_offset, - SidecarCallback cb, void *context) { - const STATE_T *id_mask_map = sidecar_ids_to_mask_const(n); - const struct sidecar_id_offset *id_map = n->id_list; - - STATE_T e_local = load_state(&enabled->bits); - - DEBUG_PRINTF("playing %u matches\n", match_len); - TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits); - - for (u32 i = 0; i < match_len; i++) { - u64a offset = matches[i].loc - b + base_offset; - DEBUG_PRINTF("match at %llu\n", offset); - - STATE_T local_m = andnot_state(load_state(&matches[i].mask), e_local); - - e_local = and_state(matches[i].mask, e_local); - - TDEBUG_PRINTF("%08llu=~%08llu^%08llu\n", (u64a)local_m, - (u64a)matches[i].mask, (u64a)e_local); - - while (is_not_zero(local_m)) { - u32 bit = find_and_clear_lsb(&local_m); - DEBUG_PRINTF("bit %u at %llu\n", bit, offset); - const u32 *id_base = (const u32 *) - ((const char *)n + id_map[bit].first_offset); - assert(ISALIGNED_N(id_base, 4)); - u32 count = id_map[bit].count; - for (u32 j = 0; j < count; ++j) { - cb(offset, id_base[j], context); - STATE_T u_local = and_state(id_mask_map[id_base[j]], - load_state(&n->unshared_mask)); - DEBUG_PRINTF("squashing unshared???\n"); - e_local = andnot_state(u_local, e_local); - local_m = andnot_state(u_local, local_m); - } - } - } - - TDEBUG_PRINTF("enabled %08llu\n", (u64a)e_local); - store_state(&enabled->bits, e_local); -} - -/* returns count of match locations */ -static really_inline -MR_STRUCT *EXEC_I_FN(const SIDECAR_STRUCT *n, const u8 *b, const u8 *b_end, - STATE_T state_in, MR_STRUCT *matches) { - DEBUG_PRINTF("running over %zu\n", b_end - b); - const STATE_T *table = (const STATE_T *)&n->reach; - ISTATE_T s = state_in; - - b_end--; /* last byte is unrolled at end of function */ - for (; b < b_end; b++) { - u8 c = *b; - ISTATE_T r = table[c]; - ISTATE_T s1 = iand_state(s, r); - if (inot_eq(s1, s)) { - TDEBUG_PRINTF("recording match %08llu\n", (u64a)s1); - matches->loc = b; - store_state(&matches->mask, s1); - matches++; - if (DO_DEAD_CHECK && iis_zero(s1)) { - goto done; - } - } - s = s1; - } - - /* do final byte by itself; gain blessing from the gcc gods */ - u8 c = *b; - ISTATE_T r = table[c]; - ISTATE_T s1 = iand_state(s, r); - if (inot_eq(s1, s)) { - TDEBUG_PRINTF("recording match %08llu\n", (u64a)s1); - matches->loc = b; - matches->mask = s1; - matches++; - } - -done: - return matches; -} - -static never_inline -void EXEC_FN(const SIDECAR_STRUCT *n, const u8 *b, size_t len, - ENABLED_STRUCT *enabled, struct sidecar_scratch *scratch, - u64a base_offset, SidecarCallback cb, void *context) { - STATE_T e_local = load_state(&enabled->bits); - if (is_zero(e_local)) { - return; - } - - MR_STRUCT *matches = (MR_STRUCT *)scratch; - DEBUG_PRINTF("running sidecar over %zu len\n", len); - DEBUG_PRINTF("enabled %p scratch %p\n", enabled, scratch); - TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits); - MR_STRUCT *matches_out = EXEC_I_FN(n, b, b + len, e_local, matches); - TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits); - if (matches_out - matches) { - PLAY_CB_FB(n, b, matches, matches_out - matches, enabled, base_offset, - cb, context); - } - - TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits); -} - -static really_inline -void ENABLED_INIT_FN(struct sidecar_enabled *enabled) { - ENABLED_STRUCT *e = (void *)enabled; - store_state(&e->bits, zero_state); -} - -static really_inline -void ENABLED_UNION_FN(struct sidecar_enabled *dest, - const struct sidecar_enabled *src) { - ENABLED_STRUCT *d = (void *)dest; - const ENABLED_STRUCT *s = (const void *)src; - store_state(&d->bits, or_state(load_state(&d->bits), load_state(&s->bits))); -} - - -#undef ENABLED_STRUCT -#undef ENABLED_INIT_FN -#undef ENABLED_UNION_FN -#undef EXEC_FN -#undef EXEC_I_FN -#undef load_state -#undef MR_STRUCT -#undef PLAY_CB_FB -#undef SIDECAR_STRUCT -#undef store_state -#undef and_state -#undef iand_state -#undef andnot_state -#undef not_eq -#undef inot_eq -#undef or_state -#undef is_zero -#undef is_not_zero -#undef zero_state - -#undef TDEBUG_PRINTF -#undef ATDEBUG_PRINTF - -#undef ISTATE_T - -#undef TAG -#undef STATE_T diff --git a/src/sidecar/sidecar_internal.h b/src/sidecar/sidecar_internal.h deleted file mode 100644 index d1725355c..000000000 --- a/src/sidecar/sidecar_internal.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SIDECAR_INTERNAL_H -#define SIDECAR_INTERNAL_H - -#include "ue2common.h" - -#define SIDECAR_8 0 -#define SIDECAR_32 1 -#define SIDECAR_64 2 -#define SIDECAR_128 3 -#define SIDECAR_256 4 -#define SIDECAR_N 5 -#define SIDECAR_S 6 - -struct sidecar_id_offset { - u32 first_offset; /* from base of sidecar */ - u32 count; -}; - -struct sidecar { - u8 type; - u32 size; - u32 id_count; - u32 mask_bit_count; -}; /* .. followed in memory by reach table */ - -#define SIDECAR_SPEC(bit_count, base_type) \ -struct sidecar_##bit_count { \ - struct sidecar header; \ - base_type reach[N_CHARS]; \ - struct sidecar_id_offset id_list[bit_count];\ - base_type unshared_mask; \ -}; - -struct sidecar_N { - struct sidecar header; - char c; - char nocase; - u32 report_count; - u32 reports[]; -}; - -struct sidecar_S { - struct sidecar header; - m128 hi; - m128 lo; - struct sidecar_id_offset id_list[8]; - u8 unshared_mask; -}; - -SIDECAR_SPEC(8, u8) -SIDECAR_SPEC(32, u32) -SIDECAR_SPEC(64, u64a) -SIDECAR_SPEC(128, m128) -SIDECAR_SPEC(256, m256) - -struct sidecar_enabled { - u8 null; -}; - -struct sidecar_enabled_8 { - u8 bits; -}; - -struct sidecar_enabled_32 { - u32 bits; -}; - -struct sidecar_enabled_64 { - u64a bits; -}; - -struct sidecar_enabled_128 { - m128 bits; -}; - -struct sidecar_enabled_256 { - m256 bits; -}; - -struct sidecar_enabled_N { - u8 bits; -}; - -struct sidecar_enabled_S { - u8 bits; -}; - -union sidecar_enabled_any { - struct sidecar_enabled arb; - struct sidecar_enabled_8 e8; - struct sidecar_enabled_32 e32; - struct sidecar_enabled_64 e64; - struct sidecar_enabled_128 e128; - struct sidecar_enabled_256 e256; - struct sidecar_enabled_N eN; - struct sidecar_enabled_S eS; -}; - -/* ASCII ART TIME - * - * non-noodle sidecars - * - * --------------------- - * [ struct sidecar ] ROUNDUP_16(sizeof(sidecar)) - * --------------------- | - * [ ] | Shufti: masks here - * [ reach table ] sizeof(N) * N_CHARS | - * [ ] | - * --------------------- - * [ bit->id list head ] N * sizeof(sidecar_id_offset) - * --------------------- - * --------------------- sizeof(sidecar_N) - * [ ] - * [ id->masks ] count(id) * sizeof(N) - * [ ] - * --------------------- - * [ ] - * [ id lists ] complicated * sizeof(report) - * [ ] - * --------------------- - */ - -#define sidecar_ids_to_mask_const(side_struct) \ - ((const void *)((const char *)side_struct + sizeof(*side_struct))) - - - -#endif diff --git a/src/sidecar/sidecar_shufti.c b/src/sidecar/sidecar_shufti.c deleted file mode 100644 index 51969ddcb..000000000 --- a/src/sidecar/sidecar_shufti.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sidecar_shufti.h" -#include "sidecar_internal.h" -#include "ue2common.h" -#include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" - -#define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4) - -#ifdef DEBUG -#include -UNUSED static void dumpMsk(m128 msk) { - u8 *maskAsU8 = (u8 *)&msk; - for (int i = 0; i < 16; i++) { - printf("%02hhx ", maskAsU8[i]); - } -} - -UNUSED static void dumpMskAsChars(m128 msk) { - u8 *maskAsU8 = (u8 *)&msk; - for (int i = 0; i < 16; i++) { - u8 c = maskAsU8[i]; - if (isprint(c)) - printf("%c",c); - else - printf("."); - } -} -#endif - -static really_inline -u8 squash(m128 t) { - m128 u = byteShiftRight128(t, 8); - t = and128(t, u); - m128 v = byteShiftRight128(t, 4); - t = and128(t, v); - u32 gpr = movd(t); - gpr &= gpr >> 16; - gpr &= gpr >> 8; - DEBUG_PRINTF(" gpr: %02x\n", (u8)gpr); - return (u8)gpr; -} - - -static really_inline -m128 mainLoop(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); - m128 t = or128(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMskAsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk(t); printf("\n"); -#endif - - return t; -} - -u8 sidecarExec_S_int(const struct sidecar_S *n, const u8 *b, - size_t len, u8 state) { - const m128 low4bits = _mm_set1_epi8(0xf); - const u8 *b_end = b + len; - m128 mask_lo = n->lo; - m128 mask_hi = n->hi; - - // Preconditioning: most of the time our buffer won't be aligned - DEBUG_PRINTF("warmup %02hhx\n", state); - m128 chars = loadu128(b); - m128 t = _mm_set1_epi8(state); - t = and128(t, mainLoop(mask_lo, mask_hi, chars, low4bits)); - b = ROUNDUP_PTR(b + 1, 16); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - - DEBUG_PRINTF("main %02hhx\n", state); - const u8 *last_block = b_end - 16; - while (b < last_block) { - m128 lchars = load128(b); - m128 rv = mainLoop(mask_lo, mask_hi, lchars, low4bits); - t = and128(t, rv); - b += 16; - if (!squash(t)) { - return 0; - } - } - - DEBUG_PRINTF("cool down %02hhx\n", state); - assert(b <= b_end && b >= b_end - 16); - // do an unaligned load the end to accurate picture to the end - chars = loadu128(b_end - 16); - m128 rv = mainLoop(mask_lo, mask_hi, chars, low4bits); - t = and128(t, rv); - - return squash(t); -} diff --git a/src/sidecar/sidecar_shufti.h b/src/sidecar/sidecar_shufti.h deleted file mode 100644 index 83e5e4302..000000000 --- a/src/sidecar/sidecar_shufti.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SIDECAR_SHUFTI_H -#define SIDECAR_SHUFTI_H - -#include "ue2common.h" - -struct sidecar_S; - -u8 sidecarExec_S_int(const struct sidecar_S *n, const u8 *b, size_t len, - u8 state_in); -#endif diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index a8925a3c9..c58f64f24 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -64,7 +64,6 @@ set(unit_internal_SOURCES internal/repeat.cpp internal/rose_build_merge.cpp internal/rvermicelli.cpp - internal/sidecar.cpp internal/simd_utils.cpp internal/shuffle.cpp internal/shufti.cpp diff --git a/unit/internal/sidecar.cpp b/unit/internal/sidecar.cpp deleted file mode 100644 index bd5179265..000000000 --- a/unit/internal/sidecar.cpp +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "ue2common.h" -#include "sidecar/sidecar.h" -#include "sidecar/sidecar_compile.h" -#include "sidecar/sidecar_internal.h" -#include "util/alloc.h" -#include "util/charreach.h" - -#include -#include -#include "gtest/gtest.h" - -using namespace testing; -using namespace ue2; -using std::vector; -using std::set; -using std::tie; -using std::tuple; - -namespace { - -void ns_cb(UNUSED u64a offset, u32 id, void *ctxt) { - u32 *seen = (u32 *)ctxt; - *seen |= 1U << id; -} - -void set_cb(UNUSED u64a offset, u32 id, void *ctxt) { - set *seen = (set *)ctxt; - seen->insert(id); -} - -TEST(Sidecar, ns1) { - const size_t data_len = 1024; - u8 data[data_len]; - - CharReach c_1; - c_1.set('f'); - vector charclasses; - charclasses.push_back(c_1); - auto ns = sidecarCompile(charclasses); - - ASSERT_TRUE(ns != nullptr); - ASSERT_LT(0U, sidecarSize(ns.get())); - - auto enabled = - aligned_zmalloc_unique(sidecarEnabledSize(ns.get())); - sidecarEnabledInit(ns.get(), enabled.get()); - auto scratch = - aligned_zmalloc_unique(sidecarScratchSize(ns.get())); - - for (u32 i = 0; i < 256; i++) { - SCOPED_TRACE(i); - u32 seen = 0; - memset(data, i, data_len); - sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0, - ns_cb, &seen); - ASSERT_EQ(0U, seen); - } - - sidecarEnabledAdd(ns.get(), enabled.get(), 0); - - for (u32 i = 0; i < 256; i++) { - SCOPED_TRACE(i); - u32 seen = 0; - memset(data, i, data_len); - sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0, - ns_cb, &seen); - if (i == 'f') { - ASSERT_EQ(1U, seen); - } else { - ASSERT_EQ(0U, seen); - } - } -} - -const char* sidecarStrings[] = { - "f", - "a", - "A", - "ab", - "\r\n", // an old favourite - "\t\r\n", - " \r\n", - "xyz", - "z0y1", - "01234567", // 8 elements - "!@#$%^&*()", // 10 elements - "qwertyuiopasdfgh", // 16 elements - "qwertyuiopasdfghj", // 17 elements - "qwertyuiopasdfghjklzxcvb", // 24 elements - "qwertyuiopasdfghjklzxcvbnm012345", // 32 elements - "qwertyuiopasdfghjklzxcvbnm0123456" // 33 elements -}; - -const u32 sidecarModels[] = { - SIDECAR_8, - SIDECAR_32, - SIDECAR_64, - SIDECAR_128, - SIDECAR_256, - SIDECAR_N, - SIDECAR_S -}; - -// Number of elements we can handle in each model -const u32 sidecarSizes[] = { - 8, - 32, - 64, - 128, - 256, - 1, - 8 -}; - -// Parameterized test case for string of single-byte classes -class SidecarTest : public TestWithParam> { -protected: - virtual void SetUp() { - tie(model, chars) = GetParam(); - size_t num = strlen(chars); - charclasses.resize(num); - - for (size_t i = 0; i < num; i++) { - charclasses[i].set(chars[i]); - } - } - - virtual bool fitsModel() { - for (size_t i = 0; i < ARRAY_LENGTH(sidecarModels); i++) { - if (sidecarModels[i] == model) { - return charclasses.size() <= sidecarSizes[i]; - } - } - return false; - } - - u32 model; - const char *chars; - vector charclasses; -}; - -TEST_P(SidecarTest, Individual) { - SCOPED_TRACE(chars); - - // Skip this test if the model is too small - if (!fitsModel()) { - return; - } - - auto ns = sidecarCompile(charclasses, model); - if (!ns && model == SIDECAR_S) { /* shufti is fussi */ - return; - } - ASSERT_TRUE(ns != nullptr); - ASSERT_LT(0U, sidecarSize(ns.get())); - - auto enabled = - aligned_zmalloc_unique(sidecarEnabledSize(ns.get())); - sidecarEnabledInit(ns.get(), enabled.get()); - auto local_enabled = - aligned_zmalloc_unique(sidecarEnabledSize(ns.get())); - auto scratch = - aligned_zmalloc_unique(sidecarScratchSize(ns.get())); - - const size_t data_len = 1024; - u8 data[data_len]; - - // with nothing enabled, nothing should fire - for (u32 i = 0; i < 256; i++) { - SCOPED_TRACE(i); - memset(data, i, data_len); - set seen; - sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0, - set_cb, &seen); - ASSERT_TRUE(seen.empty()); - } - - // test that every char class fires when enabled separately - for (u32 j = 0; j < charclasses.size(); j++) { - u32 c = chars[j]; - SCOPED_TRACE(c); - - // build a "compile time" enabled structure and add class j to it. - sidecarEnabledInit(ns.get(), local_enabled.get()); - sidecarEnabledAdd(ns.get(), local_enabled.get(), j); - - // union class j into our runtime enabled structure. - sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get()); - - for (u32 i = 0; i < 256; i++) { - SCOPED_TRACE(i); - memset(data, i, data_len); - set seen; - sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), - 0, set_cb, &seen); - if (i == c) { - ASSERT_EQ(1U, seen.size()); - ASSERT_EQ(j, *seen.begin()); - } else { - ASSERT_TRUE(seen.empty()); - } - } - } -} - -TEST_P(SidecarTest, Together) { - SCOPED_TRACE(chars); - - // Skip this test if the model is too small - if (!fitsModel()) { - return; - } - - auto ns = sidecarCompile(charclasses, model); - if (!ns && model == SIDECAR_S) { /* shufti is fussi */ - return; - } - ASSERT_TRUE(ns != nullptr); - ASSERT_LT(0U, sidecarSize(ns.get())); - - auto enabled = - aligned_zmalloc_unique(sidecarEnabledSize(ns.get())); - sidecarEnabledInit(ns.get(), enabled.get()); - auto local_enabled = - aligned_zmalloc_unique(sidecarEnabledSize(ns.get())); - auto scratch = - aligned_zmalloc_unique(sidecarScratchSize(ns.get())); - - const size_t data_len = 1024; - u8 data[data_len]; - - // with nothing enabled, nothing should fire - for (u32 i = 0; i < 256; i++) { - SCOPED_TRACE(i); - memset(data, i, data_len); - set seen; - sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0, - set_cb, &seen); - ASSERT_TRUE(seen.empty()); - } - - // test that every char class fires - for (u32 j = 0; j < charclasses.size(); j++) { - // enable the whole lot - sidecarEnabledInit(ns.get(), enabled.get()); - for (u32 i = 0; i < charclasses.size(); i++) { - // build a "compile time" enabled structure and add class j to it. - sidecarEnabledInit(ns.get(), local_enabled.get()); - sidecarEnabledAdd(ns.get(), local_enabled.get(), i); - - // union class j into our runtime enabled structure. - sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get()); - } - - u32 c = chars[j]; - SCOPED_TRACE(c); - - for (u32 i = 0; i < 256; i++) { - SCOPED_TRACE(i); - memset(data, i, data_len); - set seen; - sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), - 0, set_cb, &seen); - if (i == c) { - // seen should contain only `c' - ASSERT_EQ(1U, seen.size()); - ASSERT_FALSE(seen.end() == seen.find(j)); - } else { - // seen should not contain `c', and either zero or one char can - // have matched - ASSERT_GT(2U, seen.size()); - ASSERT_TRUE(seen.end() == seen.find(j)); - } - } - } -} - -INSTANTIATE_TEST_CASE_P(Sidecar, SidecarTest, - Combine(ValuesIn(sidecarModels), - ValuesIn(sidecarStrings))); - -} From 9cb22335892b04ee1d86855fbe64bb91b927bb25 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 19 Nov 2015 09:32:05 +1100 Subject: [PATCH 007/218] rose: Use an interpreter for role runtime Replace much of the RoseRole structure with an interpreted program, simplifying the Rose runtime and making it much more flexible. --- CMakeLists.txt | 1 + src/rose/eod.c | 11 +- src/rose/match.c | 676 +++++++------- src/rose/match.h | 8 +- src/rose/rose_build_bytecode.cpp | 1414 ++++++++++++++++++------------ src/rose/rose_build_compile.cpp | 51 -- src/rose/rose_build_dump.cpp | 93 +- src/rose/rose_build_impl.h | 8 +- src/rose/rose_build_merge.cpp | 8 - src/rose/rose_build_misc.cpp | 3 - src/rose/rose_dump.cpp | 323 +++++-- src/rose/rose_internal.h | 81 +- src/rose/rose_program.h | 167 ++++ src/util/container.h | 9 +- 14 files changed, 1669 insertions(+), 1184 deletions(-) create mode 100644 src/rose/rose_program.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ff475955..f10e5cb5e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -470,6 +470,7 @@ set (hs_exec_SRCS src/rose/runtime.h src/rose/rose.h src/rose/rose_internal.h + src/rose/rose_program.h src/rose/rose_types.h src/rose/rose_common.h src/util/bitutils.h diff --git a/src/rose/eod.c b/src/rose/eod.c index 46605f93f..60bf2ea2c 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -169,11 +169,12 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, /* mark role as handled so we don't touch it again in this walk */ fatbit_set(handled_roles, t->roleCount, role); - DEBUG_PRINTF("fire report for role %u, report=%u\n", role, - tr->reportId); - int rv = scratch->tctxt.cb(offset, tr->reportId, - scratch->tctxt.userCtx); - if (rv == MO_HALT_MATCHING) { + u64a som = 0; + int work_done = 0; + hwlmcb_rv_t rv = + roseRunRoleProgram(t, tr->programOffset, offset, &som, + &(scratch->tctxt), &work_done); + if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } } diff --git a/src/rose/match.c b/src/rose/match.c index d71cbe43e..ac9958668 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -31,6 +31,7 @@ #include "infix.h" #include "match.h" #include "miracle.h" +#include "rose_program.h" #include "rose.h" #include "som/som_runtime.h" #include "util/bitutils.h" @@ -319,22 +320,18 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, static rose_inline hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, - const struct RoseRole *tr, u64a som, + u32 qi, u32 top, u64a som, u64a end, struct RoseContext *tctxt, char in_anchored) { - DEBUG_PRINTF("woot we have a mask/follower/suffix/... role\n"); + DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - assert(tr->suffixOffset); - - const struct NFA *nfa - = (const struct NFA *)((const char *)t + tr->suffixOffset); u8 *aa = getActiveLeafArray(t, tctxt->state); struct hs_scratch *scratch = tctxtToScratch(tctxt); - u32 aaCount = t->activeArrayCount; - u32 qCount = t->queueCount; - u32 qi = nfa->queueIndex; + const u32 aaCount = t->activeArrayCount; + const u32 qCount = t->queueCount; struct mq *q = &scratch->queues[qi]; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + const struct NFA *nfa = getNfaByInfo(t, info); struct core_info *ci = &scratch->core_info; s64a loc = (s64a)end - ci->buf_offset; @@ -368,7 +365,6 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, } } - u32 top = tr->suffixEvent; assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); pushQueueSom(q, top, loc, som); @@ -748,14 +744,12 @@ char rosePrefixCheckMiracles(const struct RoseEngine *t, return 1; } -static rose_inline -char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, - u64a end, struct RoseContext *tctxt) { +static really_inline +char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, + ReportID leftfixReport, u64a end, + struct RoseContext *tctxt) { struct hs_scratch *scratch = tctxtToScratch(tctxt); struct core_info *ci = &scratch->core_info; - assert(tr->flags & ROSE_ROLE_FLAG_ROSE); - - u32 qi = tr->leftfixQueue; u32 ri = queueToLeftIndex(t, qi); const struct LeftNfaInfo *left = getLeftTable(t) + ri; @@ -763,9 +757,9 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", (left->transient ? "transient" : "active"), (left->infix ? "infix" : "prefix"), - ri, qi, tr->leftfixLag, left->maxLag); + ri, qi, leftfixLag, left->maxLag); - assert(tr->leftfixLag <= left->maxLag); + assert(leftfixLag <= left->maxLag); struct mq *q = scratch->queues + qi; u32 qCount = t->queueCount; @@ -776,7 +770,7 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, return 0; } - if (unlikely(end < tr->leftfixLag)) { + if (unlikely(end < leftfixLag)) { assert(0); /* lag is the literal length */ return 0; } @@ -816,9 +810,9 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, } } - s64a loc = (s64a)end - ci->buf_offset - tr->leftfixLag; + s64a loc = (s64a)end - ci->buf_offset - leftfixLag; assert(loc >= q_cur_loc(q)); - assert(tr->leftfixReport != MO_INVALID_IDX); + assert(leftfixReport != MO_INVALID_IDX); if (left->transient) { s64a start_loc = loc - left->transient; @@ -855,7 +849,7 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, pushQueueNoMerge(q, MQE_END, loc); - char rv = nfaQueueExecRose(q->nfa, q, tr->leftfixReport); + char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); if (!rv) { /* nfa is dead */ DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); @@ -869,12 +863,12 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); - DEBUG_PRINTF("checking for report %u\n", tr->leftfixReport); + DEBUG_PRINTF("checking for report %u\n", leftfixReport); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); return rv == MO_MATCHES_PENDING; } else { - DEBUG_PRINTF("checking for report %u\n", tr->leftfixReport); - char rv = nfaInAcceptState(q->nfa, tr->leftfixReport, q); + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + char rv = nfaInAcceptState(q->nfa, leftfixReport, q); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); return rv; } @@ -882,136 +876,84 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, static rose_inline void roseSetRole(const struct RoseEngine *t, u8 *state, - struct RoseContext *tctxt, const struct RoseRole *tr) { - DEBUG_PRINTF("set role %u on: idx=%u, depth=%u, groups=0x%016llx\n", - (u32)(tr - getRoleTable(t)), - tr->stateIndex, tr->depth, tr->groups); - void *role_state = getRoleState(state); - - assert(tr < getRoleTable(t) + t->roleCount); - - int leafNode = !!(tr->stateIndex == MMB_INVALID); - - // If this role is a leaf node, it doesn't have a state index to switch - // on and it doesn't need any history stored or other work done. So we can - // bail. - /* may be a ghost role; still need to set groups */ - if (leafNode) { - tctxt->groups |= tr->groups; - DEBUG_PRINTF("role %u is a leaf node, no work to do.\n", - (u32)(tr - getRoleTable(t))); - return; - } - - // Switch this role on in the state bitvector, checking whether it was set - // already. - char alreadySet = mmbit_set(role_state, t->rolesWithStateCount, - tr->stateIndex); - - // Roles that we've already seen have had most of their bookkeeping done: - // all we need to do is update the offset table if this is an - // offset-tracking role. - if (alreadySet) { - DEBUG_PRINTF("role already set\n"); - return; - } - - // If this role's depth is greater than the current depth, update it - update_depth(tctxt, tr); - - // Switch on this role's groups - tctxt->groups |= tr->groups; + struct RoseContext *tctxt, u32 stateIndex, u8 depth) { + DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth); + mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex); + update_depth(tctxt, depth); } static rose_inline -void roseTriggerInfixes(const struct RoseEngine *t, const struct RoseRole *tr, - u64a start, u64a end, struct RoseContext *tctxt) { +void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, + u32 topEvent, u8 cancel, struct RoseContext *tctxt) { struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + s64a loc = (s64a)end - ci->buf_offset; - DEBUG_PRINTF("infix time! @%llu\t(s%llu)\n", end, start); + u32 ri = queueToLeftIndex(t, qi); + assert(topEvent < MQE_INVALID); - assert(tr->infixTriggerOffset); + const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); + assert(!left->transient); - u32 qCount = t->queueCount; - u32 arCount = t->activeLeftCount; - struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; - u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); - s64a loc = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); - const struct RoseTrigger *curr_r = (const struct RoseTrigger *) - ((const char *)t + tr->infixTriggerOffset); - assert(ISALIGNED_N(curr_r, alignof(struct RoseTrigger))); - assert(curr_r->queue != MO_INVALID_IDX); /* shouldn't be here if no - * triggers */ - do { - u32 qi = curr_r->queue; - u32 ri = queueToLeftIndex(t, qi); - u32 topEvent = curr_r->event; - u8 cancel = curr_r->cancel_prev_top; - assert(topEvent < MQE_INVALID); + struct mq *q = tctxtToScratch(tctxt)->queues + qi; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); - assert(!left->transient); + u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); + const u32 arCount = t->activeLeftCount; + char alive = mmbit_set(activeLeftArray, arCount, ri); - DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); + if (alive && info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + return; + } - struct mq *q = tctxtToScratch(tctxt)->queues + qi; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; + const u32 qCount = t->queueCount; - char alive = mmbit_set(activeLeftArray, arCount, ri); + if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && + !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) { + DEBUG_PRINTF("yawn - zombie\n"); + return; + } - if (alive && info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - goto next_infix; + if (cancel) { + DEBUG_PRINTF("dominating top: (re)init\n"); + fatbit_set(aqa, qCount, qi); + initRoseQueue(t, qi, left, tctxt); + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (!fatbit_set(aqa, qCount, qi)) { + DEBUG_PRINTF("initing %u\n", qi); + initRoseQueue(t, qi, left, tctxt); + if (alive) { + s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); + pushQueueAt(q, 0, MQE_START, sp); + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); } + } else if (!alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (isQueueFull(q)) { + reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset - && !fatbit_isset(aqa, qCount, qi) - && isZombie(t, tctxt->state, left)) { - DEBUG_PRINTF("yawn - zombie\n"); - goto next_infix; - } + if (isQueueFull(q)) { + /* still full - reduceQueue did nothing */ + DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, + q->end - q->cur); + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - if (cancel) { - DEBUG_PRINTF("dominating top: (re)init\n"); - fatbit_set(aqa, qCount, qi); - initRoseQueue(t, qi, left, tctxt); - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (!fatbit_set(aqa, qCount, qi)) { - DEBUG_PRINTF("initing %u\n", qi); - initRoseQueue(t, qi, left, tctxt); - if (alive) { - s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); - pushQueueAt(q, 0, MQE_START, sp); - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } - } else if (!alive) { q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (isQueueFull(q)) { - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - - if (isQueueFull(q)) { - /* still full - reduceQueue did nothing */ - DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", - qi, q->end - q->cur); - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } } + } - pushQueueSom(q, topEvent, loc, start); - next_infix: - ++curr_r; - } while (curr_r->queue != MO_INVALID_IDX); + pushQueueSom(q, topEvent, loc, start); } static really_inline @@ -1024,10 +966,11 @@ int reachHasBit(const u8 *reach, u8 c) { * are satisfied. */ static rose_inline -int roseCheckLookaround(const struct RoseEngine *t, const struct RoseRole *tr, - u64a end, struct RoseContext *tctxt) { - assert(tr->lookaroundIndex != MO_INVALID_IDX); - assert(tr->lookaroundCount > 0); +int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex, + u32 lookaroundCount, u64a end, + struct RoseContext *tctxt) { + assert(lookaroundIndex != MO_INVALID_IDX); + assert(lookaroundCount > 0); const struct core_info *ci = &tctxtToScratch(tctxt)->core_info; DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, @@ -1035,12 +978,12 @@ int roseCheckLookaround(const struct RoseEngine *t, const struct RoseRole *tr, const u8 *base = (const u8 *)t; const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + tr->lookaroundIndex; - const s8 *look_end = look + tr->lookaroundCount; + const s8 *look = look_base + lookaroundIndex; + const s8 *look_end = look + lookaroundCount; assert(look < look_end); const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + tr->lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; // The following code assumes that the lookaround structures are ordered by // increasing offset. @@ -1113,38 +1056,6 @@ int roseCheckLookaround(const struct RoseEngine *t, const struct RoseRole *tr, return 1; } -static rose_inline -int roseCheckRolePreconditions(const struct RoseEngine *t, - const struct RoseRole *tr, u64a end, - struct RoseContext *tctxt) { - // If this role can only match at end-of-block, then check that it's so. - if (tr->flags & ROSE_ROLE_FLAG_ONLY_AT_END) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - if (end != ci->buf_offset + ci->len) { - DEBUG_PRINTF("role %u should only match at end of data, skipping\n", - (u32)(tr - getRoleTable(t))); - return 0; - } - } - - if (tr->lookaroundIndex != MO_INVALID_IDX) { - if (!roseCheckLookaround(t, tr, end, tctxt)) { - DEBUG_PRINTF("failed lookaround check\n"); - return 0; - } - } - - assert(!tr->leftfixQueue || (tr->flags & ROSE_ROLE_FLAG_ROSE)); - if (tr->flags & ROSE_ROLE_FLAG_ROSE) { - if (!roseTestLeftfix(t, tr, end, tctxt)) { - DEBUG_PRINTF("failed leftfix check\n"); - return 0; - } - } - - return 1; -} - static int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, void *context) { @@ -1154,20 +1065,18 @@ int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, } static rose_inline -u64a roseGetHaigSom(const struct RoseEngine *t, const struct RoseRole *tr, - UNUSED u64a end, struct RoseContext *tctxt) { - assert(tr->flags & ROSE_ROLE_FLAG_ROSE); - - u32 qi = tr->leftfixQueue; +u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, + UNUSED const u32 leftfixLag, + struct RoseContext *tctxt) { u32 ri = queueToLeftIndex(t, qi); UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", left->transient ? "transient" : "active", ri, qi, - tr->leftfixLag, left->maxLag); + leftfixLag, left->maxLag); - assert(tr->leftfixLag <= left->maxLag); + assert(leftfixLag <= left->maxLag); struct mq *q = tctxtToScratch(tctxt)->queues + qi; @@ -1186,98 +1095,217 @@ u64a roseGetHaigSom(const struct RoseEngine *t, const struct RoseRole *tr, return start; } -static really_inline -hwlmcb_rv_t roseHandleRoleEffects(const struct RoseEngine *t, - const struct RoseRole *tr, u64a end, - struct RoseContext *tctxt, char in_anchored, - int *work_done) { - u64a som = 0ULL; - if (tr->flags & ROSE_ROLE_FLAG_SOM_ADJUST) { - som = end - tr->somAdjust; - DEBUG_PRINTF("som requested som %llu = %llu - %u\n", som, end, - tr->somAdjust); - } else if (tr->flags & ROSE_ROLE_FLAG_SOM_ROSEFIX) { - som = roseGetHaigSom(t, tr, end, tctxt); - DEBUG_PRINTF("som from rosefix %llu\n", som); - } - - if (tr->infixTriggerOffset) { - roseTriggerInfixes(t, tr, som, end, tctxt); - tctxt->groups |= tr->groups; /* groups may have been cleared by infix - * going quiet before */ - } - - if (tr->suffixOffset) { - hwlmcb_rv_t rv = roseHandleSuffixTrigger(t, tr, som, end, tctxt, - in_anchored); - if (rv != HWLM_CONTINUE_MATCHING) { - return rv; - } +static rose_inline +char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + if (end < min_bound) { + return 0; } + return max_bound == ROSE_BOUND_INF || end <= max_bound; +} - if (tr->reportId != MO_INVALID_IDX) { - hwlmcb_rv_t rv; - if (tr->flags & ROSE_ROLE_FLAG_REPORT_START) { - /* rose role knows its start offset */ - assert(tr->flags & ROSE_ROLE_FLAG_SOM_ROSEFIX); - assert(!(tr->flags & ROSE_ROLE_FLAG_CHAIN_REPORT)); - if (tr->flags & ROSE_ROLE_FLAG_SOM_REPORT) { - rv = roseHandleSomSom(t, tctxt->state, tr->reportId, som, end, - tctxt, in_anchored); - } else { - rv = roseHandleSomMatch(t, tctxt->state, tr->reportId, som, end, - tctxt, in_anchored); +#define PROGRAM_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_ROLE_INSTR_##name); \ + const struct ROSE_ROLE_STRUCT_##name *ri = \ + (const struct ROSE_ROLE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +static really_inline +hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, + u64a end, u64a *som, struct RoseContext *tctxt, + char in_anchored, int *work_done) { + assert(programOffset); + + DEBUG_PRINTF("program begins at offset %u\n", programOffset); + + const char *pc = getByOffset(t, programOffset); + + assert(*(const u8 *)pc != ROSE_ROLE_INSTR_END); + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + + switch ((enum RoseRoleInstructionCode)code) { + PROGRAM_CASE(ANCHORED_DELAY) { + if (in_anchored && end > t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("delay until playback\n"); + update_depth(tctxt, ri->depth); + tctxt->groups |= ri->groups; + *work_done = 1; + pc += ri->done_jump; + continue; + } } - } else { - if (tr->flags & ROSE_ROLE_FLAG_SOM_REPORT) { - /* do som management */ - rv = roseHandleSom(t, tctxt->state, tr->reportId, end, tctxt, - in_anchored); - } else if (tr->flags & ROSE_ROLE_FLAG_CHAIN_REPORT) { - rv = roseCatchUpAndHandleChainMatch(t, tctxt->state, - tr->reportId, end, tctxt, - in_anchored); - } else { - rv = roseHandleMatch(t, tctxt->state, tr->reportId, end, tctxt, - in_anchored); + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + if (end != ci->buf_offset + ci->len) { + DEBUG_PRINTF("should only match at end of data\n"); + pc += ri->fail_jump; + continue; + } } - } + PROGRAM_NEXT_INSTRUCTION - if (rv != HWLM_CONTINUE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } + PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + if (!in_anchored && + !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed root bounds check\n"); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION - roseSetRole(t, tctxt->state, tctxt, tr); + PROGRAM_CASE(CHECK_LOOKAROUND) { + if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION - *work_done = 1; + PROGRAM_CASE(CHECK_LEFTFIX) { + if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, + tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION - return HWLM_CONTINUE_MATCHING; -} + PROGRAM_CASE(SOM_ADJUST) { + assert(ri->distance <= end); + *som = end - ri->distance; + DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, *som); + } + PROGRAM_NEXT_INSTRUCTION -static really_inline -hwlmcb_rv_t roseHandleRole(const struct RoseEngine *t, - const struct RoseRole *tr, u64a end, - struct RoseContext *tctxt, char in_anchored, - int *work_done) { - DEBUG_PRINTF("hi role %zd (flags %08x)\n", tr - getRoleTable(t), - tr->flags); - if (in_anchored && end > t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("delay until playback, just do groups/depth now\n"); - update_depth(tctxt, tr); - tctxt->groups |= tr->groups; - *work_done = 1; - return HWLM_CONTINUE_MATCHING; - } + PROGRAM_CASE(SOM_LEFTFIX) { + *som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt); + DEBUG_PRINTF("som from leftfix is %llu\n", *som); + } + PROGRAM_NEXT_INSTRUCTION - if (!roseCheckRolePreconditions(t, tr, end, tctxt)) { - return HWLM_CONTINUE_MATCHING; + PROGRAM_CASE(TRIGGER_INFIX) { + roseTriggerInfix(t, *som, end, ri->queue, ri->event, ri->cancel, + tctxt); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseHandleSuffixTrigger(t, ri->queue, ri->event, *som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report, + end, tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EOD) { + if (tctxt->cb(end, ri->report, tctxt->userCtx) == + MO_HALT_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + if (roseHandleSomSom(t, tctxt->state, ri->report, *som, end, + tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_KNOWN) { + if (roseHandleSomMatch(t, tctxt->state, ri->report, *som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + tctxt->groups |= ri->groups; + DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + } } - /* We now know the role has matched. We can now trigger things that need to - * be triggered and record things that need to be recorded.*/ + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; +} + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION - return roseHandleRoleEffects(t, tr, end, tctxt, in_anchored, work_done); +hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, + u64a end, u64a *som, struct RoseContext *tctxt, + int *work_done) { + return roseRunRoleProgram_i(t, programOffset, end, som, tctxt, 0, + work_done); } static really_inline @@ -1364,9 +1392,12 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, /* mark role as handled so we don't touch it again in this walk */ fatbit_set(handled_roles, t->roleCount, role); - hwlmcb_rv_t rv = roseHandleRole(t, tr, end, tctxt, - 0 /* in_anchored */, &work_done); - if (rv == HWLM_TERMINATE_MATCHING) { + if (!tr->programOffset) { + continue; + } + u64a som = 0ULL; + if (roseRunRoleProgram_i(t, tr->programOffset, end, &som, tctxt, 0, + &work_done) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -1381,51 +1412,26 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } -// Check that the predecessor bounds are satisfied for a root role with special -// requirements (anchored, or unanchored but with preceding dots). -static rose_inline -char roseCheckRootBounds(const struct RoseEngine *t, const struct RoseRole *tr, - u64a end) { - assert(tr->predOffset != ROSE_OFFSET_INVALID); - const struct RosePred *tp = getPredTable(t) + tr->predOffset; - assert(tp->role == MO_INVALID_IDX); - - // Check history. We only use a subset of our history types for root or - // anchored root roles. - assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE || - tp->historyCheck == ROSE_ROLE_HISTORY_ANCH); - - return roseCheckPredHistory(tp, end); -} - // Walk the set of root roles (roles with depth 1) associated with this literal // and set them on. static really_inline char roseWalkRootRoles_i(const struct RoseEngine *t, const struct RoseLiteral *tl, u64a end, struct RoseContext *tctxt, char in_anchored) { - /* main entry point ensures that there is at least two root roles */ - int work_done = 0; - - assert(tl->rootRoleOffset + tl->rootRoleCount <= t->rootRoleCount); - assert(tl->rootRoleCount > 1); + if (!tl->rootProgramOffset) { + return 1; + } - const u32 *rootRole = getRootRoleTable(t) + tl->rootRoleOffset; - const u32 *rootRoleEnd = rootRole + tl->rootRoleCount; - for (; rootRole < rootRoleEnd; rootRole++) { - u32 role_offset = *rootRole; - const struct RoseRole *tr = getRoleByOffset(t, role_offset); + DEBUG_PRINTF("running literal root program at %u\n", tl->rootProgramOffset); - if (!in_anchored && (tr->flags & ROSE_ROLE_PRED_ROOT) - && !roseCheckRootBounds(t, tr, end)) { - continue; - } + u64a som = 0; + int work_done = 0; - if (roseHandleRole(t, tr, end, tctxt, in_anchored, &work_done) - == HWLM_TERMINATE_MATCHING) { - return 0; - } - }; + if (roseRunRoleProgram_i(t, tl->rootProgramOffset, end, &som, tctxt, + in_anchored, + &work_done) == HWLM_TERMINATE_MATCHING) { + return 0; + } // If we've actually handled any roles, we might need to apply this // literal's squash mask to our groups as well. @@ -1450,73 +1456,20 @@ char roseWalkRootRoles_N(const struct RoseEngine *t, return roseWalkRootRoles_i(t, tl, end, tctxt, 0); } -static really_inline -char roseWalkRootRoles_i1(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt, char in_anchored) { - /* main entry point ensures that there is exactly one root role */ - int work_done = 0; - u32 role_offset = tl->rootRoleOffset; - const struct RoseRole *tr = getRoleByOffset(t, role_offset); - - if (!in_anchored && (tr->flags & ROSE_ROLE_PRED_ROOT) - && !roseCheckRootBounds(t, tr, end)) { - return 1; - } - - hwlmcb_rv_t rv = roseHandleRole(t, tr, end, tctxt, in_anchored, &work_done); - if (rv == HWLM_TERMINATE_MATCHING) { - return 0; - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return 1; -} - -static never_inline -char roseWalkRootRoles_A1(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i1(t, tl, end, tctxt, 1); -} - -static never_inline -char roseWalkRootRoles_N1(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i1(t, tl, end, tctxt, 0); -} - - static really_inline char roseWalkRootRoles(const struct RoseEngine *t, const struct RoseLiteral *tl, u64a end, struct RoseContext *tctxt, char in_anchored, char in_anch_playback) { - DEBUG_PRINTF("literal has %u root roles\n", tl->rootRoleCount); - - assert(!in_anch_playback || tl->rootRoleCount); - if (!in_anch_playback && !tl->rootRoleCount) { + assert(!in_anch_playback || tl->rootProgramOffset); + if (!in_anch_playback && !tl->rootProgramOffset) { return 1; } if (in_anchored) { - if (tl->rootRoleCount == 1) { - return roseWalkRootRoles_A1(t, tl, end, tctxt); - } else { - return roseWalkRootRoles_A(t, tl, end, tctxt); - } + return roseWalkRootRoles_A(t, tl, end, tctxt); } else { - if (tl->rootRoleCount == 1) { - return roseWalkRootRoles_N1(t, tl, end, tctxt); - } else { - return roseWalkRootRoles_N(t, tl, end, tctxt); - } + return roseWalkRootRoles_N(t, tl, end, tctxt); } } @@ -1617,12 +1570,11 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - assert(tl->rootRoleCount > 0); + assert(tl->rootProgramOffset); assert(!tl->delay_mask); - DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx, " - "rootRoleCount=%u\n", - id, tl->minDepth, tl->groups, tl->rootRoleCount); + DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n", id, + tl->minDepth, tl->groups); if (real_end <= t->floatingMinLiteralMatchOffset) { roseFlushLastByteHistory(t, state, real_end, tctxt); @@ -1688,8 +1640,8 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("lit id=%u, minDepth=%u, groups=0x%016llx, rootRoleCount=%u\n", - id, tl->minDepth, tl->groups, tl->rootRoleCount); + DEBUG_PRINTF("lit id=%u, minDepth=%u, groups=0x%016llx\n", id, tl->minDepth, + tl->groups); if (do_group_check && !(tl->groups & tctxt->groups)) { DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n"); diff --git a/src/rose/match.h b/src/rose/match.h index 19365f01a..a39bebf37 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -262,8 +262,8 @@ hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt } static really_inline -void update_depth(struct RoseContext *tctxt, const struct RoseRole *tr) { - u8 d = MAX(tctxt->depth, tr->depth + 1); +void update_depth(struct RoseContext *tctxt, u8 depth) { + u8 d = MAX(tctxt->depth, depth + 1); assert(d >= tctxt->depth); DEBUG_PRINTF("depth now %hhu was %hhu\n", d, tctxt->depth); tctxt->depth = d; @@ -323,4 +323,8 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, scratch->sparse_iter_state); } +hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, + u64a end, u64a *som, struct RoseContext *tctxt, + int *work_done); + #endif diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 3b8949e4c..978d413db 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -38,6 +38,7 @@ #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" +#include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ #include "hwlm/hwlm_build.h" #include "nfa/castlecompile.h" @@ -116,61 +117,41 @@ namespace /* anon */ { // Orders RoseEdge edges by the state index of the source node struct EdgeSourceStateCompare { - EdgeSourceStateCompare(const RoseGraph &g_, - const vector &roleTable_) : - g(g_), roleTable(roleTable_) {} - bool operator()(const RoseEdge &a, const RoseEdge &b) const { - u32 arole = g[source(a, g)].role; - u32 brole = g[source(b, g)].role; - if (arole >= roleTable.size()) { - DEBUG_PRINTF("bad arole %u (idx=%zu)\n", arole, g[source(a, g)].idx); - } - if (brole >= roleTable.size()) { - DEBUG_PRINTF("bad brole %u (idx=%zu)\n", brole, g[source(b, g)].idx); + EdgeSourceStateCompare( + const RoseGraph &g_, + const ue2::unordered_map &roleStateIndices_) + : g(g_), roleStateIndices(roleStateIndices_) {} + + u32 state_index(RoseVertex v) const { + auto it = roleStateIndices.find(v); + if (it != roleStateIndices.end()) { + return it->second; } - assert(arole < roleTable.size()); - assert(brole < roleTable.size()); - return roleTable.at(arole).stateIndex < roleTable.at(brole).stateIndex; + return MMB_INVALID; } - const RoseGraph &g; - const vector &roleTable; -}; -struct RoseTriggerOrdering { - RoseTriggerOrdering() {} - bool operator()(const RoseTrigger &a, const RoseTrigger &b) const { - ORDER_CHECK(queue); - ORDER_CHECK(event); - ORDER_CHECK(cancel_prev_top); - return false; - } -}; -struct RoseTriggerEquality { - RoseTriggerEquality() {} - bool operator()(const RoseTrigger &a, const RoseTrigger &b) const { - return a.queue == b.queue - && a.event == b.event - && a.cancel_prev_top == b.cancel_prev_top; + bool operator()(const RoseEdge &a, const RoseEdge &b) const { + return state_index(source(a, g)) < state_index(source(b, g)); } + + const RoseGraph &g; + const ue2::unordered_map &roleStateIndices; }; struct left_build_info { // Constructor for an engine implementation. - left_build_info(NFA *n, u32 q, u32 l, u32 t, rose_group sm, + left_build_info(u32 q, u32 l, u32 t, rose_group sm, const std::vector &stops, u32 max_ql, u8 cm_count, const CharReach &cm_cr) - : nfa(n), queue(q), lag(l), transient(t), squash_mask(sm), - stopAlphabet(stops), max_queuelen(max_ql), - countingMiracleCount(cm_count), countingMiracleReach(cm_cr) { - assert(n); - } + : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), + max_queuelen(max_ql), countingMiracleCount(cm_count), + countingMiracleReach(cm_cr) {} // Constructor for a lookaround implementation. explicit left_build_info(const vector &look) : has_lookaround(true), lookaround(look) {} - NFA *nfa = nullptr; /* uniquely idents the left_build_info */ - u32 queue = 0; /* also uniquely idents the left_build_info */ + u32 queue = 0; /* uniquely idents the left_build_info */ u32 lag = 0; u32 transient = 0; rose_group squash_mask = ~rose_group{0}; @@ -183,6 +164,104 @@ struct left_build_info { vector lookaround; // alternative implementation to the NFA }; +/** \brief Role instruction model used at compile time. */ +class RoleInstruction { +public: + RoleInstruction() { + memset(&u, 0, sizeof(u)); + u.end.code = ROSE_ROLE_INSTR_END; + } + + explicit RoleInstruction(enum RoseRoleInstructionCode c) { + memset(&u, 0, sizeof(u)); + u.end.code = c; + } + + bool operator<(const RoleInstruction &a) const { + return memcmp(&u, &a.u, sizeof(u)) < 0; + } + + bool operator==(const RoleInstruction &a) const { + return memcmp(&u, &a.u, sizeof(u)) == 0; + } + + enum RoseRoleInstructionCode code() const { + // Note that this sort of type-punning (relying on identical initial + // layout) is explicitly allowed by the C++11 standard. + return (enum RoseRoleInstructionCode)u.end.code; + } + + const void *get() const { + switch (code()) { + case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; + case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return &u.checkRootBounds; + case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; + case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; + case ROSE_ROLE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; + case ROSE_ROLE_INSTR_SOM_ADJUST: return &u.somAdjust; + case ROSE_ROLE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; + case ROSE_ROLE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; + case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; + case ROSE_ROLE_INSTR_REPORT: return &u.report; + case ROSE_ROLE_INSTR_REPORT_CHAIN: return &u.reportChain; + case ROSE_ROLE_INSTR_REPORT_EOD: return &u.reportEod; + case ROSE_ROLE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; + case ROSE_ROLE_INSTR_REPORT_SOM: return &u.reportSom; + case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; + case ROSE_ROLE_INSTR_SET_STATE: return &u.setState; + case ROSE_ROLE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_ROLE_INSTR_END: return &u.end; + } + assert(0); + return &u.end; + } + + size_t length() const { + switch (code()) { + case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); + case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return sizeof(u.checkRootBounds); + case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); + case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); + case ROSE_ROLE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); + case ROSE_ROLE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); + case ROSE_ROLE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); + case ROSE_ROLE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); + case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); + case ROSE_ROLE_INSTR_REPORT: return sizeof(u.report); + case ROSE_ROLE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); + case ROSE_ROLE_INSTR_REPORT_EOD: return sizeof(u.reportEod); + case ROSE_ROLE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); + case ROSE_ROLE_INSTR_REPORT_SOM: return sizeof(u.reportSom); + case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); + case ROSE_ROLE_INSTR_SET_STATE: return sizeof(u.setState); + case ROSE_ROLE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_ROLE_INSTR_END: return sizeof(u.end); + } + return 0; + } + + union { + ROSE_ROLE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; + ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS checkRootBounds; + ROSE_ROLE_STRUCT_CHECK_LOOKAROUND checkLookaround; + ROSE_ROLE_STRUCT_CHECK_LEFTFIX checkLeftfix; + ROSE_ROLE_STRUCT_ANCHORED_DELAY anchoredDelay; + ROSE_ROLE_STRUCT_SOM_ADJUST somAdjust; + ROSE_ROLE_STRUCT_SOM_LEFTFIX somLeftfix; + ROSE_ROLE_STRUCT_TRIGGER_INFIX triggerInfix; + ROSE_ROLE_STRUCT_TRIGGER_SUFFIX triggerSuffix; + ROSE_ROLE_STRUCT_REPORT report; + ROSE_ROLE_STRUCT_REPORT_CHAIN reportChain; + ROSE_ROLE_STRUCT_REPORT_EOD reportEod; + ROSE_ROLE_STRUCT_REPORT_SOM_INT reportSomInt; + ROSE_ROLE_STRUCT_REPORT_SOM reportSom; + ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; + ROSE_ROLE_STRUCT_SET_STATE setState; + ROSE_ROLE_STRUCT_SET_GROUPS setGroups; + ROSE_ROLE_STRUCT_END end; + } u; +}; + struct build_context : boost::noncopyable { /** \brief Rose Role information. * These entries are filled in by a number of functions as other tables are @@ -190,6 +269,9 @@ struct build_context : boost::noncopyable { */ vector roleTable; + /** \brief Role program mapping, keyed by index in roleTable. */ + vector> rolePrograms; + /** \brief minimum depth in number of hops from root/anchored root. */ map depths; @@ -215,15 +297,110 @@ struct build_context : boost::noncopyable { /** \brief Map from literal final ID to a set of non-root role IDs. */ ue2::unordered_map> litNonRootRoles; - /* contents of rose immediately following the RoseEngine. */ - vector engine_blob; + /** \brief State indices, for those roles that have them. */ + ue2::unordered_map roleStateIndices; + + /** \brief Mapping from queue index to bytecode offset for built engines + * that have already been pushed into the engine_blob. */ + ue2::unordered_map engineOffsets; + + /** \brief Contents of the Rose bytecode immediately following the + * RoseEngine. */ + vector> engine_blob; - /* base offset of engine_blob in the bytecode */ - const u32 engine_blob_base = ROUNDUP_16(sizeof(RoseEngine)); + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ + static constexpr u32 engine_blob_base = ROUNDUP_CL(sizeof(RoseEngine)); }; } +static +void pad_engine_blob(build_context &bc, size_t align) { + assert(ISALIGNED_N(bc.engine_blob_base, align)); + size_t s = bc.engine_blob.size(); + + if (ISALIGNED_N(s, align)) { + return; + } + + bc.engine_blob.resize(s + align - s % align); +} + +static +u32 add_to_engine_blob(build_context &bc, const void *a, const size_t len, + const size_t align) { + pad_engine_blob(bc, align); + + size_t rv = bc.engine_blob_base + bc.engine_blob.size(); + assert(rv >= bc.engine_blob_base); + DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); + + assert(ISALIGNED_N(bc.engine_blob.size(), align)); + + bc.engine_blob.resize(bc.engine_blob.size() + len); + memcpy(&bc.engine_blob.back() - len + 1, a, len); + + return verify_u32(rv); +} + +template +static +u32 add_to_engine_blob(build_context &bc, const T &a) { + static_assert(is_pod::value, "should be pod"); + return add_to_engine_blob(bc, &a, sizeof(a), alignof(a)); +} + +template +static +u32 add_to_engine_blob(build_context &bc, const T &a, const size_t len) { + static_assert(is_pod::value, "should be pod"); + return add_to_engine_blob(bc, &a, len, alignof(a)); +} + +template +static +u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { + using value_type = typename Iter::value_type; + static_assert(is_pod::value, "should be pod"); + + if (b == e) { + return 0; + } + + u32 offset = add_to_engine_blob(bc, *b); + for (++b; b != e; ++b) { + add_to_engine_blob(bc, *b); + } + + return offset; +} + +static +const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { + assert(contains(bc.engineOffsets, qi)); + u32 nfa_offset = bc.engineOffsets.at(qi); + assert(nfa_offset >= bc.engine_blob_base); + const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - + bc.engine_blob_base); + assert(n->queueIndex == qi); + return n; +} + +static +const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { + u32 qi = nfa.queueIndex; + u32 nfa_offset = add_to_engine_blob(bc, nfa, nfa.length); + DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, + nfa.type, nfa.length, nfa_offset); + + assert(!contains(bc.engineOffsets, qi)); + bc.engineOffsets.emplace(qi, nfa_offset); + + const NFA *n = get_nfa_from_blob(bc, qi); + assert(memcmp(&nfa, n, nfa.length) == 0); + return n; +} + /* vertex ordered by their role index */ static vector get_ordered_verts(const RoseGraph &g) { @@ -767,15 +944,13 @@ void setLeftNfaProperties(NFA &n, const left_id &left) { } static -bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, - vector> *built_out, - set *no_retrigger_queues, - map *leftfix_info, +bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, + QueueIndexFactory &qif, set *no_retrigger_queues, bool do_prefix) { const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; - ue2::unordered_map seen; + ue2::unordered_map seen; // already built queue indices map > infixTriggers; findInfixTriggers(tbi, &infixTriggers); @@ -798,7 +973,6 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, // our in-edges. assert(roseHasTops(g, v)); - NFA *n; u32 qi; // queue index, set below. u32 lag = g[v].left.lag; bool is_transient = contains(tbi.transient, leftfix); @@ -807,16 +981,15 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, vector lookaround; if (makeLeftfixLookaround(tbi, v, lookaround)) { DEBUG_PRINTF("implementing as lookaround!\n"); - leftfix_info->emplace(v, left_build_info(lookaround)); + bc.leftfix_info.emplace(v, left_build_info(lookaround)); continue; } } if (contains(seen, leftfix)) { // NFA already built. - n = seen[leftfix]; - qi = n->queueIndex; - assert(qi < built_out->size()); + qi = seen[leftfix]; + assert(contains(bc.engineOffsets, qi)); DEBUG_PRINTF("sharing leftfix, qi=%u\n", qi); } else { DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); @@ -841,7 +1014,6 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, setLeftNfaProperties(*nfa, leftfix); qi = qif.get_queue(); - assert(qi == built_out->size()); nfa->queueIndex = qi; if (!is_prefix && !leftfix.haig() && leftfix.graph() && @@ -850,10 +1022,9 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, no_retrigger_queues->insert(qi); } - n = nfa.get(); - seen.insert(make_pair(leftfix, n)); DEBUG_PRINTF("built leftfix, qi=%u\n", qi); - built_out->push_back(move(nfa)); + add_nfa_to_blob(bc, *nfa); + seen.emplace(leftfix, qi); } rose_group squash_mask = tbi.rose_squash_masks.at(leftfix); @@ -899,10 +1070,9 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); } - leftfix_info->insert( - make_pair(v, left_build_info(n, qi, lag, max_width, - squash_mask, stop, max_queuelen, - cm_count, cm_cr))); + bc.leftfix_info.emplace( + v, left_build_info(qi, lag, max_width, squash_mask, stop, + max_queuelen, cm_count, cm_cr)); } return true; @@ -934,7 +1104,6 @@ bool hasNonSmallBlockOutfix(const vector &outfixes) { static aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { assert(!outfix.is_dead()); // should not be marked dead. - assert(!outfix.nfa); // should not be already built. const CompileContext &cc = tbi.cc; const ReportManager &rm = tbi.rm; @@ -978,14 +1147,13 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth); } - outfix.nfa = n.get(); return n; } static -void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, - size_t *historyRequired, bool *mpv_as_outfix) { - assert(built_nfas->empty()); +void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, + bool *mpv_as_outfix) { + assert(bc.engineOffsets.empty()); // MPV should be first *mpv_as_outfix = false; OutfixInfo *mpv = nullptr; @@ -1004,7 +1172,6 @@ void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, } assert(mpv->chained); - assert(!mpv->nfa); auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes); assert(nfa); if (!nfa) { @@ -1016,7 +1183,6 @@ void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, } u32 qi = mpv->get_queue(tbi.qif); - assert(qi == built_nfas->size()); nfa->queueIndex = qi; DEBUG_PRINTF("built mpv\n"); @@ -1025,8 +1191,7 @@ void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, *historyRequired = 1; } - mpv->nfa = nfa.get(); - built_nfas->push_back(move(nfa)); + add_nfa_to_blob(bc, *nfa); *mpv_as_outfix = !mpv->puffettes.empty(); } @@ -1053,8 +1218,7 @@ void setOutfixProperties(NFA &n, const OutfixInfo &outfix) { } static -bool prepOutfixes(RoseBuildImpl &tbi, - vector> *built_nfas, +bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired) { if (tbi.cc.grey.onlyOneOutfix && tbi.outfixes.size() > 1) { DEBUG_PRINTF("we have %zu outfixes, but Grey::onlyOneOutfix is set\n", @@ -1062,9 +1226,7 @@ bool prepOutfixes(RoseBuildImpl &tbi, throw ResourceLimitError(); } - assert(tbi.qif.allocated_count() == built_nfas->size()); - /* assume outfixes are just above chain tails in queue indices */ - built_nfas->reserve(tbi.outfixes.size()); + assert(tbi.qif.allocated_count() == bc.engineOffsets.size()); for (auto &out : tbi.outfixes) { if (out.chained) { @@ -1080,15 +1242,13 @@ bool prepOutfixes(RoseBuildImpl &tbi, setOutfixProperties(*n, out); - u32 qi = tbi.qif.get_queue(); - assert(qi == built_nfas->size()); - n->queueIndex = qi; + n->queueIndex = out.get_queue(tbi.qif); if (!*historyRequired && requires_decompress_key(*n)) { *historyRequired = 1; } - built_nfas->push_back(move(n)); + add_nfa_to_blob(bc, *n); } return true; @@ -1139,16 +1299,24 @@ void setSuffixProperties(NFA &n, const suffix_id &suff, } static -bool buildSuffixes(const RoseBuildImpl &tbi, - vector> *built_nfas, +bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, map *suffixes, set *no_retrigger_queues) { map > suffixTriggers; findSuffixTriggers(tbi, &suffixTriggers); + // To ensure compile determinism, build suffix engines in order of their + // (unique) queue indices, so that we call add_nfa_to_blob in the same + // order. + vector> ordered; for (const auto &e : *suffixes) { - const suffix_id &s = e.first; - const u32 queue = e.second; + ordered.emplace_back(e.second, e.first); + } + sort(begin(ordered), end(ordered)); + + for (const auto &e : ordered) { + const u32 queue = e.first; + const suffix_id &s = e.second; const set &s_triggers = suffixTriggers.at(s); map fixed_depth_tops; @@ -1173,69 +1341,12 @@ bool buildSuffixes(const RoseBuildImpl &tbi, no_retrigger_queues->insert(queue); } - if (built_nfas->size() <= queue) { - built_nfas->resize(queue + 1); - } - - (*built_nfas)[queue] = move(n); + add_nfa_to_blob(bc, *n); } return true; } -static -void pad_engine_blob(build_context &bc, size_t align) { - assert(ISALIGNED_N(bc.engine_blob_base, align)); - size_t s = bc.engine_blob.size(); - - if (ISALIGNED_N(s, align)) { - return; - } - - bc.engine_blob.resize(s + align - s % align); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a) { - static_assert(is_pod::value, "should be pod"); - pad_engine_blob(bc, alignof(T)); - - size_t rv = bc.engine_blob_base + bc.engine_blob.size(); - assert(rv >= bc.engine_blob_base); - - assert(ISALIGNED_N(bc.engine_blob.size(), alignof(T))); - - bc.engine_blob.resize(bc.engine_blob.size() + sizeof(a)); - memcpy(&bc.engine_blob.back() - sizeof(a) + 1, &a, sizeof(a)); - - return verify_u32(rv); -} - -template -static -u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { - using value_type = typename Iter::value_type; - static_assert(is_pod::value, "should be pod"); - pad_engine_blob(bc, alignof(value_type)); - - size_t rv = bc.engine_blob_base + bc.engine_blob.size(); - assert(rv >= bc.engine_blob_base); - - assert(ISALIGNED_N(bc.engine_blob.size(), alignof(value_type))); - - size_t total_added_length = sizeof(*b) * distance(b, e); - bc.engine_blob.resize(bc.engine_blob.size() + total_added_length); - char *p = bc.engine_blob.data() + bc.engine_blob.size() - - total_added_length; - for (; b != e; ++b, p += sizeof(*b)) { - memcpy(p, &*b, sizeof(*b)); - } - assert(p - 1 == &bc.engine_blob.back()); - - return verify_u32(rv); -} - static void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { map, u32> pre_built; @@ -1292,26 +1403,22 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { } static -bool buildNfas(RoseBuildImpl &tbi, QueueIndexFactory &qif, - vector> *built_nfas, +bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, map *suffixes, - map *leftfix_info, set *no_retrigger_queues, u32 *leftfixBeginQueue) { findSuffixes(tbi, qif, suffixes); - if (!buildSuffixes(tbi, built_nfas, suffixes, no_retrigger_queues)) { + if (!buildSuffixes(tbi, bc, suffixes, no_retrigger_queues)) { return false; } *leftfixBeginQueue = qif.allocated_count(); - if (!buildLeftfixes(tbi, qif, built_nfas, no_retrigger_queues, leftfix_info, - true)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, true)) { return false; } - if (!buildLeftfixes(tbi, qif, built_nfas, no_retrigger_queues, leftfix_info, - false)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, false)) { return false; } @@ -1364,20 +1471,20 @@ void findTransientQueues(const map &leftfix_info, } static -void updateNfaState(const vector> &built_nfas, - const map &leftfix_info, - RoseStateOffsets *so, NfaInfo *nfa_infos, - u32 *fullStateSize, u32 *nfaStateSize, u32 *tStateSize) { +void updateNfaState(const build_context &bc, RoseStateOffsets *so, + NfaInfo *nfa_infos, u32 *fullStateSize, u32 *nfaStateSize, + u32 *tStateSize) { *nfaStateSize = 0; *tStateSize = 0; *fullStateSize = 0; set transient_queues; - findTransientQueues(leftfix_info, &transient_queues); + findTransientQueues(bc.leftfix_info, &transient_queues); - for (const auto &n : built_nfas) { - allocateStateSpace(n.get(), transient_queues, so, nfa_infos, - fullStateSize, nfaStateSize, tStateSize); + for (const auto &m : bc.engineOffsets) { + const NFA *n = get_nfa_from_blob(bc, m.first); + allocateStateSpace(n, transient_queues, so, nfa_infos, fullStateSize, + nfaStateSize, tStateSize); } } @@ -2121,9 +2228,8 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { for (auto v : vertices_range(g)) { if (hasLastByteHistoryOutEdge(g, v)) { - u32 role = g[v].role; - assert(role < bc.roleTable.size()); - lb_roles.push_back(bc.roleTable[role].stateIndex); + assert(contains(bc.roleStateIndices, v)); + lb_roles.push_back(bc.roleStateIndices.at(v)); } } @@ -2154,16 +2260,6 @@ const char *describeHistory(RoseRoleHistory history) { } #endif -static -u32 calcNfaSize(const vector> &nfas) { - size_t nfas_size = 0; - - for (const auto &n : nfas) { - nfas_size += ROUNDUP_CL(n->length); - } - return verify_u32(nfas_size); -} - static void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) { // Global limit. @@ -2188,81 +2284,6 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre } } -/* copies nfas into the final engine and updates role to reflect nfa offset */ -static -u32 copyInNFAs(const RoseBuildImpl &tbi, vector *roleTable, - const vector> &built_nfas, - const set &no_retrigger_queues, NfaInfo *infos, - u32 base_nfa_offset, - const map &suffixes, char *ptr) { - const RoseGraph &g = tbi.g; - const CompileContext &cc = tbi.cc; - - // Enforce engine count resource limit. - if (built_nfas.size() > cc.grey.limitRoseEngineCount) { - throw ResourceLimitError(); - } - - vector suffix_base(built_nfas.size()); - vector classic_top(built_nfas.size(), false); - - for (u32 i = 0; i < built_nfas.size(); i++) { - const NFA *n = built_nfas[i].get(); - - // Enforce individual engine size limit. - enforceEngineSizeLimit(n, n->length, cc.grey); - - DEBUG_PRINTF("copying in nfa %u: len=%u, offset=%u\n", i, n->length, - base_nfa_offset); - - memcpy(ptr + base_nfa_offset, n, n->length); - suffix_base[i] = base_nfa_offset; - - if (!isMultiTopType(n->type)) { - classic_top[i] = true; - } - - infos[i].nfaOffset = base_nfa_offset; - if (contains(no_retrigger_queues, i)) { - infos[i].no_retrigger = 1; - } - base_nfa_offset += ROUNDUP_CL(n->length); - } - - /* Write NFA indices into RoseRole structures for suffix NFAs */ - for (auto v : vertices_range(g)) { - if (!g[v].suffix) { - continue; - } - - u32 nfa_index = suffixes.at(g[v].suffix); - assert(nfa_index < suffix_base.size()); - - assert(g[v].role < roleTable->size()); - RoseRole &tr = (*roleTable)[g[v].role]; - tr.suffixOffset = suffix_base[nfa_index]; - - // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP - // event. - if (classic_top[nfa_index]) { - assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); - tr.suffixEvent = MQE_TOP; - } else { - assert(!g[v].suffix.haig); - u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; - assert(top < MQE_INVALID); - tr.suffixEvent = top; - } - - /* mark suffixes triggered by etable literals */ - if (tbi.isInETable(v)) { - infos[nfa_index].eod = 1; - } - } - - return base_nfa_offset; -} - static u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { const RoseGraph &g = tbi.g; @@ -2284,82 +2305,6 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { return minWidth; } -static -vector buildRoseTriggerList(const RoseGraph &g, RoseVertex u, - const map &leftfix_info) { - // Terminator struct that marks the end of each role's trigger list. - RoseTrigger terminator; - memset(&terminator, 0, sizeof(RoseTrigger)); - terminator.queue = MO_INVALID_IDX; - terminator.event = MQE_INVALID; - terminator.cancel_prev_top = false; - - vector rv; - - for (const auto &e : out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (!g[v].left) { - continue; - } - - assert(contains(leftfix_info, v)); - const left_build_info &rbi = leftfix_info.at(v); - if (rbi.has_lookaround) { - continue; - } - assert(rbi.nfa); - - // DFAs have no TOP_N support, so they get a classic MQE_TOP event. - u32 top; - if (!isMultiTopType(rbi.nfa->type)) { - assert(num_tops(g[v].left) == 1); - top = MQE_TOP; - } else { - top = MQE_TOP_FIRST + g[e].rose_top; - assert(top < MQE_INVALID); - } - - rv.push_back(terminator); - RoseTrigger &trigger = rv.back(); - trigger.queue = rbi.nfa->queueIndex; - trigger.event = top; - trigger.cancel_prev_top = g[e].rose_cancel_prev_top; - } - - if (rv.empty()) { - return rv; - } - - sort(rv.begin(), rv.end(), RoseTriggerOrdering()); - rv.erase(unique(rv.begin(), rv.end(), RoseTriggerEquality()), rv.end()); - - rv.push_back(terminator); - - return rv; -} - -static -void buildRoseTriggerLists(const RoseBuildImpl &tbi, build_context &bc) { - const RoseGraph &g = tbi.g; - for (auto u : vertices_range(g)) { - if (tbi.isAnyStart(u) || g[u].literals.empty() - || tbi.hasDirectFinalId(u)) { - continue; - } - - assert(g[u].role < bc.roleTable.size()); - RoseRole &tr = bc.roleTable.at(g[u].role); - - vector trigs = buildRoseTriggerList(g, u, bc.leftfix_info); - - if (!trigs.empty()) { - assert(trigs.size() != 1); /* at min should be trig + term */ - tr.infixTriggerOffset = add_to_engine_blob(bc, trigs.begin(), - trigs.end()); - } - } -} - static void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, const QueueIndexFactory &qif, @@ -2381,8 +2326,7 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, /* for each outfix also build elists */ for (const auto &outfix : tbi.outfixes) { - assert(outfix.nfa); - u32 qi = outfix.nfa->queueIndex; + u32 qi = outfix.get_queue(); set ekeys = reportsToEkeys(all_reports(outfix), tbi.rm); if (!ekeys.empty()) { @@ -2431,7 +2375,6 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &tbi) { /* outfixes */ for (const auto &out : tbi.outfixes) { - assert(out.nfa); if (hasMpvTrigger(all_reports(out), tbi.rm)) { return true; } @@ -2451,31 +2394,53 @@ bool hasInternalReport(const set &reports, const ReportManager &rm) { } static -void populateNfaInfoBasics(NfaInfo *infos, const vector &outfixes, - const ReportManager &rm, +void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, + const vector &outfixes, const map &suffixes, - const vector &ekeyListOffsets) { + const vector &ekeyListOffsets, + const set &no_retrigger_queues, + NfaInfo *infos) { + const u32 num_queues = build.qif.allocated_count(); + for (u32 qi = 0; qi < num_queues; qi++) { + const NFA *n = get_nfa_from_blob(bc, qi); + enforceEngineSizeLimit(n, n->length, build.cc.grey); + + NfaInfo &info = infos[qi]; + info.nfaOffset = bc.engineOffsets.at(qi); + info.ekeyListOffset = ekeyListOffsets[qi]; + info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; + } + + // Mark outfixes that only trigger external reports. for (const auto &out : outfixes) { - assert(out.nfa); - const u32 qi = out.nfa->queueIndex; + const u32 qi = out.get_queue(); infos[qi].in_sbmatcher = out.in_sbmatcher; - if (!hasInternalReport(all_reports(out), rm)) { + if (!hasInternalReport(all_reports(out), build.rm)) { infos[qi].only_external = 1; } - - infos[qi].ekeyListOffset = ekeyListOffsets[qi]; } + // Mark suffixes that only trigger external reports. for (const auto &e : suffixes) { const suffix_id &s = e.first; u32 qi = e.second; - if (!hasInternalReport(all_reports(s), rm)) { + if (!hasInternalReport(all_reports(s), build.rm)) { infos[qi].only_external = 1; } + } - infos[qi].ekeyListOffset = ekeyListOffsets[qi]; + // Mark suffixes triggered by EOD table literals. + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (!g[v].suffix) { + continue; + } + u32 qi = suffixes.at(g[v].suffix); + if (build.isInETable(v)) { + infos[qi].eod = 1; + } } } @@ -2652,51 +2617,117 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { return out; } +/** + * \brief Flattens a list of role programs into one finalised program with its + * fail_jump/done_jump targets set correctly. + */ +static +vector +flattenRoleProgram(const vector> &program) { + vector out; + + vector offsets; // offset of each instruction (bytes) + vector targets; // jump target for each instruction + + size_t curr_offset = 0; + for (const auto &prog : program) { + for (const auto &ri : prog) { + out.push_back(ri); + offsets.push_back(curr_offset); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + for (size_t i = 0; i < prog.size(); i++) { + targets.push_back(curr_offset); + } + } + + // Add an END instruction. + out.emplace_back(ROSE_ROLE_INSTR_END); + offsets.push_back(curr_offset); + targets.push_back(curr_offset); + + for (size_t i = 0; i < out.size(); i++) { + auto &ri = out[i]; + switch (ri.code()) { + case ROSE_ROLE_INSTR_ANCHORED_DELAY: + ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: + ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: + ri.u.checkRootBounds.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: + ri.u.checkLookaround.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_LEFTFIX: + ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i]; + break; + default: + break; + } + } + + return out; +} + static -void buildRootRoleTable(const RoseBuildImpl &tbi, u32 roleTableOffset, - vector &literalTable, - vector *rootRoleTable) { +u32 writeRoleProgram(build_context &bc, vector &program) { + DEBUG_PRINTF("writing %zu instructions\n", program.size()); + u32 programOffset = 0; + for (const auto &ri : program) { + u32 offset = + add_to_engine_blob(bc, ri.get(), ri.length(), ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("code %u len %zu written at offset %u\n", ri.code(), + ri.length(), offset); + if (!programOffset) { + programOffset = offset; + } + } + return programOffset; +} + +static +void buildRootRolePrograms(const RoseBuildImpl &build, build_context &bc, + vector &literalTable) { for (u32 id = 0; id < literalTable.size(); id++) { - RoseLiteral &tl = literalTable[id]; - const rose_literal_info &lit_info = - **getLiteralInfoByFinalId(tbi, id).begin(); - const auto &vertices = lit_info.vertices; - - tl.rootRoleOffset = verify_u32(rootRoleTable->size()); - tl.rootRoleCount = 0; - - for (RoseVertex v : vertices) { - if (tbi.isRootSuccessor(v)) { - if (tbi.hasDirectFinalId(v)) { - DEBUG_PRINTF("[skip root role %u as direct]\n", - tbi.g[v].role); - continue; - } - assert(tbi.isRootSuccessor(v)); - u32 role_offset - = roleTableOffset + tbi.g[v].role * sizeof(RoseRole); - rootRoleTable->push_back(role_offset); - tl.rootRoleCount++; - DEBUG_PRINTF("root role %u\n", tbi.g[v].role); + DEBUG_PRINTF("lit %u\n", id); + const auto &lit_info = **getLiteralInfoByFinalId(build, id).begin(); + + flat_set root_roles; // with programs to run. + + for (RoseVertex v : lit_info.vertices) { + if (!build.isRootSuccessor(v)) { + continue; } + if (build.hasDirectFinalId(v)) { + DEBUG_PRINTF("[skip root role %u as direct]\n", + build.g[v].role); + continue; + } + DEBUG_PRINTF("root role %u\n", build.g[v].role); + root_roles.insert(build.g[v].role); } - if (!tl.rootRoleCount) { - tl.rootRoleOffset = 0; - } else if (tl.rootRoleCount > 1) { - // Sort the entries for this literal by role index - vector::iterator begin = rootRoleTable->begin() - + tl.rootRoleOffset; - vector::iterator end = begin + tl.rootRoleCount; - sort(begin, end); - } else if (tl.rootRoleCount == 1) { - /* if there is only one root role, the rose literal stores the - * offset directly */ - tl.rootRoleOffset = (*rootRoleTable)[tl.rootRoleOffset]; + vector> root_prog; + for (const auto &role : root_roles) { + assert(role < bc.rolePrograms.size()); + const auto &role_prog = bc.rolePrograms[role]; + if (role_prog.empty()) { + continue; + } + root_prog.push_back(role_prog); } - DEBUG_PRINTF("literal %u: %u root roles, starting from idx=%u\n", id, - tl.rootRoleCount, tl.rootRoleOffset); + RoseLiteral &tl = literalTable[id]; + if (root_prog.empty()) { + tl.rootProgramOffset = 0; + continue; + } + + auto final_program = flattenRoleProgram(root_prog); + tl.rootProgramOffset = writeRoleProgram(bc, final_program); } } @@ -2722,12 +2753,10 @@ void buildActiveLeftIter(const vector &leftTable, } static -bool hasEodAnchors(const RoseBuildImpl &tbi, - const vector> &built_nfas, +bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, u32 outfixEndQueue) { - assert(outfixEndQueue <= built_nfas.size()); for (u32 i = 0; i < outfixEndQueue; i++) { - if (nfaAcceptsEod(built_nfas[i].get())) { + if (nfaAcceptsEod(get_nfa_from_blob(bc, i))) { DEBUG_PRINTF("outfix has eod\n"); return true; } @@ -2880,42 +2909,27 @@ pair buildEodAnchorRoles(RoseBuildImpl &tbi, build_context &bc, bc.roleTable.push_back(RoseRole()); RoseRole &tr = bc.roleTable.back(); memset(&tr, 0, sizeof(tr)); - tr.stateIndex = MMB_INVALID; - tr.predOffset = ROSE_OFFSET_INVALID; - tr.reportId = er.first; - tr.flags = ROSE_ROLE_FLAG_ACCEPT_EOD; + + bc.rolePrograms.push_back({}); + auto &program = bc.rolePrograms.back(); + auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_EOD); + ri.u.report.report = er.first; + program.push_back(ri); // Collect the state IDs of this report's vertices to add to the EOD // sparse iterator, creating pred entries appropriately. for (const auto &e : er.second) { RoseVertex v = source(e, g); DEBUG_PRINTF("vertex %zu has role %u\n", g[v].idx, g[v].role); - assert(g[v].role < bc.roleTable.size()); - RoseRole &predRole = bc.roleTable[g[v].role]; + assert(contains(bc.roleStateIndices, v)); + u32 predStateIdx = bc.roleStateIndices.at(v); createPred(tbi, bc, e, predTable); - const RosePred &tp = predTable.back(); - RoseIterRole ir = { (u32)(bc.roleTable.size() - 1), (u32)(predTable.size() - 1) }; - predStates[predRole.stateIndex].push_back(ir); - - if (out_degree(v, g) == 1 && tp.minBound == 0 && tp.maxBound == 0) { - // Since it leads ONLY to an EOD accept with bounds (0, 0), we - // can tag this role with the "must match at end of block" - // flag. - DEBUG_PRINTF("flagging role %u as ONLY_AT_END\n", g[v].role); - - /* There is no pointing enforcing this check at runtime if - * the predRole is only fired by eod event literal */ - if (g[v].literals.size() != 1 - || *g[v].literals.begin() != tbi.eod_event_literal_id) { - predRole.flags |= ROSE_ROLE_FLAG_ONLY_AT_END; - } - } - predRole.flags |= ROSE_ROLE_FLAG_PRED_OF_EOD; + predStates[predStateIdx].push_back(ir); } } @@ -2976,10 +2990,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, RoseLiteral &tl = literalTable.back(); memset(&tl, 0, sizeof(tl)); - // These two are set by buildRootRoleTable. - tl.rootRoleOffset = 0; - tl.rootRoleCount = 0; - tl.groups = 0; for (const auto &li : lit_infos) { tl.groups |= li->group_mask; @@ -3036,146 +3046,379 @@ void buildLiteralTable(const RoseBuildImpl &tbi, build_context &bc, } } +/** + * \brief True if the given vertex is a role that can only be switched on at + * EOD. + */ static -void createRoleEntry(RoseBuildImpl &tbi, build_context &bc, - RoseVertex v, vector &roleTable, - ue2::unordered_map, size_t> &lookaround_cache, - u32 *nextStateIndex) { - RoseGraph &g = tbi.g; +bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { + const RoseGraph &g = tbi.g; - // Vertices have been normalised by now to have <= 1 reports. - assert(g[v].reports.size() <= 1); + // All such roles have only (0,0) edges to vertices with the eod_accept + // property, and no other effects (suffixes, ordinary reports, etc, etc). - // set role ID in the graph where we can find it later - u32 roleId = (u32)roleTable.size(); - g[v].role = roleId; - // track id if it's a nonroot role for use in buildSparseIter - if (!tbi.isRootSuccessor(v)) { - for (const auto &lit_id : g[v].literals) { - u32 final_id = tbi.literal_info.at(lit_id).final_id; - bc.litNonRootRoles[final_id].insert(roleId); + if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { + return false; + } + + for (const auto &e : out_edges_range(v, g)) { + RoseVertex w = target(e, g); + if (!g[w].eod_accept) { + return false; + } + assert(!g[w].reports.empty()); + assert(g[w].literals.empty()); + + if (g[e].minBound || g[e].maxBound) { + return false; } } - roleTable.push_back(RoseRole()); - RoseRole &tr = roleTable.back(); - memset(&tr, 0, sizeof(tr)); + /* There is no pointing enforcing this check at runtime if + * this role is only fired by the eod event literal */ + if (tbi.eod_event_literal_id != MO_INVALID_IDX && + g[v].literals.size() == 1 && + *g[v].literals.begin() == tbi.eod_event_literal_id) { + return false; + } - DEBUG_PRINTF("creating role %u for i%zu, eod %u, s (%p,%p)\n", roleId, - g[v].idx, (u32)g[v].eod_accept, g[v].suffix.graph.get(), - g[v].suffix.haig.get()); + return true; +} - // accept roles get their report ID. - if (!g[v].reports.empty()) { - DEBUG_PRINTF("%zu reports\n", g[v].reports.size()); - assert(g[v].reports.size() == 1); - tr.reportId = *g[v].reports.begin(); - assert(tr.reportId < tbi.rm.numReports()); - const Report &ir = tbi.rm.getReport(tr.reportId); - if (isInternalSomReport(ir)) { - tr.flags |= ROSE_ROLE_FLAG_SOM_REPORT; - } - if (ir.type == INTERNAL_ROSE_CHAIN) { - tr.flags |= ROSE_ROLE_FLAG_CHAIN_REPORT; - } - } else { - tr.reportId = MO_INVALID_IDX; +static +void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program, + ue2::unordered_map, size_t> &lookaround_cache) { + if (!build.cc.grey.roseLookaroundMasks) { + return; } - tr.leftfixReport = g[v].left.leftfix_report; - assert(!tbi.cc.streaming || g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - tr.leftfixLag = g[v].left.lag; - tr.depth = (u8)min(254U, bc.depths.at(v)); - tr.groups = g[v].groups; - tr.flags |= ROSE_ROLE_PRED_NONE; + vector look; - if (contains(bc.leftfix_info, v)) { - const left_build_info &lni = bc.leftfix_info.at(v); - if (!lni.has_lookaround) { - tr.flags |= ROSE_ROLE_FLAG_ROSE; - tr.leftfixQueue = lni.nfa->queueIndex; - } + // Lookaround from leftfix (mandatory). + if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) { + DEBUG_PRINTF("using leftfix lookaround\n"); + look = bc.leftfix_info.at(v).lookaround; } - if (!g[v].literals.empty()) { - /* all literals for a role come from the same table -> inspect any */ - switch (tbi.literals.right.at(*g[v].literals.begin()).table) { - case ROSE_ANCHORED: - tr.flags |= ROSE_ROLE_FLAG_ANCHOR_TABLE; - break; - case ROSE_EOD_ANCHORED: - tr.flags |= ROSE_ROLE_FLAG_EOD_TABLE; - break; - default: - ; - } + // We may be able to find more lookaround info (advisory) and merge it + // in. + vector look_more; + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + + if (look.empty()) { + return; } - // Leaf nodes don't need state indices, as they don't have successors. - /* TODO: also don't need a state index if all edges are nfa based */ - if (isLeafNode(v, g)) { - tr.stateIndex = MMB_INVALID; + DEBUG_PRINTF("role has lookaround\n"); + u32 look_idx; + auto it = lookaround_cache.find(look); + if (it != lookaround_cache.end()) { + DEBUG_PRINTF("reusing look at idx %zu\n", it->second); + look_idx = verify_u32(it->second); } else { - tr.stateIndex = (*nextStateIndex)++; + size_t idx = bc.lookaround.size(); + lookaround_cache.emplace(look, idx); + insert(&bc.lookaround, bc.lookaround.end(), look); + DEBUG_PRINTF("adding look at idx %zu\n", idx); + look_idx = verify_u32(idx); + } + u32 look_count = verify_u32(look.size()); + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LOOKAROUND); + ri.u.checkLookaround.index = look_idx; + ri.u.checkLookaround.count = look_count; + program.push_back(ri); +} + +static +void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program) { + auto it = bc.leftfix_info.find(v); + if (it == end(bc.leftfix_info)) { + return; } + const left_build_info &lni = it->second; + if (lni.has_lookaround) { + return; // Leftfix completely implemented by lookaround. + } + + assert(!build.cc.streaming || + build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LEFTFIX); + ri.u.checkLeftfix.queue = lni.queue; + ri.u.checkLeftfix.lag = build.g[v].left.lag; + ri.u.checkLeftfix.report = build.g[v].left.leftfix_report; + program.push_back(ri); +} + +static +void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, + RoseVertex v, vector &program) { + // Only relevant for roles that can be triggered by the anchored table. + if (!build.isAnchored(v)) { + return; + } + + // TODO: also limit to matches that can occur after + // floatingMinLiteralMatchOffset. + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_ANCHORED_DELAY); + ri.u.anchoredDelay.depth = (u8)min(254U, bc.depths.at(v)); + ri.u.anchoredDelay.groups = build.g[v].groups; + program.push_back(ri); +} + +static +void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program) { + const auto &g = build.g; /* we are a suffaig - need to update role to provide som to the * suffix. */ bool has_som = false; if (g[v].left.tracksSom()) { - tr.flags |= ROSE_ROLE_FLAG_SOM_ROSEFIX; + assert(contains(bc.leftfix_info, v)); + const left_build_info &lni = bc.leftfix_info.at(v); + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_LEFTFIX); + ri.u.somLeftfix.queue = lni.queue; + ri.u.somLeftfix.lag = g[v].left.lag; + program.push_back(ri); has_som = true; } else if (g[v].som_adjust) { - tr.somAdjust = g[v].som_adjust; - tr.flags |= ROSE_ROLE_FLAG_SOM_ADJUST; + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_ADJUST); + ri.u.somAdjust.distance = g[v].som_adjust; + program.push_back(ri); has_som = true; } - if (has_som && !g[v].reports.empty()) { - tr.flags |= ROSE_ROLE_FLAG_REPORT_START; + // Write program instructions for reports. + for (ReportID id : g[v].reports) { + assert(id < build.rm.numReports()); + const Report &ir = build.rm.getReport(id); + if (isInternalSomReport(ir)) { + auto ri = + RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM + : ROSE_ROLE_INSTR_REPORT_SOM_INT); + ri.u.report.report = id; + program.push_back(ri); + } else if (ir.type == INTERNAL_ROSE_CHAIN) { + auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_CHAIN); + ri.u.report.report = id; + program.push_back(ri); + } else { + auto ri = + RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM_KNOWN + : ROSE_ROLE_INSTR_REPORT); + ri.u.report.report = id; + program.push_back(ri); + } } +} - vector look; - if (tbi.cc.grey.roseLookaroundMasks) { - // Lookaround from leftfix (mandatory). - if (contains(bc.leftfix_info, v) && - bc.leftfix_info.at(v).has_lookaround) { - DEBUG_PRINTF("using leftfix lookaround\n"); - look = bc.leftfix_info.at(v).lookaround; - } - // We may be able to find more lookaround info (advisory) and merge it - // in. - vector look_more; - findLookaroundMasks(tbi, v, look_more); - mergeLookaround(look, look_more); +static +void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, + const map &suffixes, + vector &program) { + const auto &g = build.g; + if (!g[v].suffix) { + return; } - if (look.empty()) { - DEBUG_PRINTF("no lookaround\n"); - tr.lookaroundIndex = MO_INVALID_IDX; - tr.lookaroundCount = 0; + assert(contains(suffixes, g[v].suffix)); + u32 qi = suffixes.at(g[v].suffix); + assert(contains(bc.engineOffsets, qi)); + const NFA *nfa = get_nfa_from_blob(bc, qi); + u32 suffixEvent; + if (isMultiTopType(nfa->type)) { + assert(!g[v].suffix.haig); + u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; + assert(top < MQE_INVALID); + suffixEvent = top; } else { - auto it = lookaround_cache.find(look); - if (it != lookaround_cache.end()) { - DEBUG_PRINTF("reusing look at idx %zu\n", it->second); - tr.lookaroundIndex = verify_u32(it->second); + // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP + // event. + assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); + suffixEvent = MQE_TOP; + } + auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_SUFFIX); + ri.u.triggerSuffix.queue = qi; + ri.u.triggerSuffix.event = suffixEvent; + program.push_back(ri); +} + +static +void makeRoleGroups(const rose_group &groups, + vector &program) { + if (!groups) { + return; + } + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_GROUPS); + ri.u.setGroups.groups = groups; + program.push_back(ri); +} + +static +void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, + RoseVertex u, vector &program) { + const auto &g = build.g; + + vector infix_program; + + for (const auto &e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (!g[v].left) { + continue; + } + + assert(contains(bc.leftfix_info, v)); + const left_build_info &lbi = bc.leftfix_info.at(v); + if (lbi.has_lookaround) { + continue; + } + + const NFA *nfa = get_nfa_from_blob(bc, lbi.queue); + + // DFAs have no TOP_N support, so they get a classic MQE_TOP event. + u32 top; + if (!isMultiTopType(nfa->type)) { + assert(num_tops(g[v].left) == 1); + top = MQE_TOP; } else { - size_t idx = bc.lookaround.size(); - lookaround_cache.insert(make_pair(look, idx)); - insert(&bc.lookaround, bc.lookaround.end(), look); - DEBUG_PRINTF("adding look at idx %zu\n", idx); - tr.lookaroundIndex = verify_u32(idx); + top = MQE_TOP_FIRST + g[e].rose_top; + assert(top < MQE_INVALID); } - tr.lookaroundCount = verify_u32(look.size()); + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_INFIX); + ri.u.triggerInfix.queue = lbi.queue; + ri.u.triggerInfix.event = top; + ri.u.triggerInfix.cancel = g[e].rose_cancel_prev_top; + infix_program.push_back(ri); + } + + if (infix_program.empty()) { + return; + } + + // Order, de-dupe and add instructions to the end of program. + sort(begin(infix_program), end(infix_program)); + unique_copy(begin(infix_program), end(infix_program), + back_inserter(program)); + + // Groups may be cleared by an infix going quiet. Set groups immediately + // after infixes are triggered. + makeRoleGroups(g[u].groups, program); +} + +static +void makeRoleSetState(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program, + u32 *nextStateIndex) { + const auto &g = build.g; + + // Leaf nodes don't need state indices, as they don't have successors. + if (isLeafNode(v, g)) { + return; + } + + /* TODO: also don't need a state index if all edges are nfa based */ + + u32 idx = (*nextStateIndex)++; + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_STATE); + ri.u.setState.index = idx; + ri.u.setState.depth = (u8)min(254U, bc.depths.at(v)); + program.push_back(ri); + bc.roleStateIndices.emplace(v, idx); +} + +static +void createRoleEntry(RoseBuildImpl &tbi, build_context &bc, + RoseVertex v, vector &roleTable, + ue2::unordered_map, size_t> &lookaround_cache, + const map &suffixes, u32 *nextStateIndex) { + RoseGraph &g = tbi.g; + + // set role ID in the graph where we can find it later + u32 roleId = verify_u32(roleTable.size()); + g[v].role = roleId; + // track id if it's a nonroot role for use in buildSparseIter + if (!tbi.isRootSuccessor(v)) { + for (const auto &lit_id : g[v].literals) { + u32 final_id = tbi.literal_info.at(lit_id).final_id; + bc.litNonRootRoles[final_id].insert(roleId); + } + } + + roleTable.push_back(RoseRole()); + RoseRole &tr = roleTable.back(); + memset(&tr, 0, sizeof(tr)); + + DEBUG_PRINTF("creating role %u for i%zu, eod %u, s (%p,%p)\n", roleId, + g[v].idx, (u32)g[v].eod_accept, g[v].suffix.graph.get(), + g[v].suffix.haig.get()); + + // Build role program. + + assert(bc.rolePrograms.size() == roleId); + bc.rolePrograms.push_back({}); + vector &program = bc.rolePrograms.back(); + + // First, add program instructions that enforce preconditions without + // effects. + + makeRoleAnchoredDelay(tbi, bc, v, program); + + if (onlyAtEod(tbi, v)) { + DEBUG_PRINTF("only at eod\n"); + program.push_back(RoleInstruction(ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); } - DEBUG_PRINTF("role id=%u, stateidx=%u, reportId=%u, " - "depth=%u, groups=0x%016llx\n", roleId, tr.stateIndex, - tr.reportId, tr.depth, tr.groups); + makeRoleLookaround(tbi, bc, v, program, lookaround_cache); + makeRoleCheckLeftfix(tbi, bc, v, program); + + // Next, we can add program instructions that have effects. + + makeRoleReports(tbi, bc, v, program); + makeRoleInfixTriggers(tbi, bc, v, program); + makeRoleSuffix(tbi, bc, v, suffixes, program); + makeRoleSetState(tbi, bc, v, program, nextStateIndex); + makeRoleGroups(g[v].groups, program); +} + +static +void writeRolePrograms(build_context &bc) { + assert(bc.roleTable.size() == bc.rolePrograms.size()); + + for (size_t i = 0; i < bc.roleTable.size(); i++) { + auto &role = bc.roleTable[i]; + auto &program = bc.rolePrograms[i]; + + if (program.empty()) { + role.programOffset = 0; + continue; + } + + // Safety check: all precondition checks should occur before + // instructions with effects. + assert(is_partitioned( + begin(program), end(program), [](const RoleInstruction &ri) { + // CHECK_LEFTFIX is the last precondition check. + return ri.code() <= ROSE_ROLE_INSTR_CHECK_LEFTFIX; + })); + + // Apply jump fixups. + auto final_program = flattenRoleProgram({program}); + + // Write into bytecode. + role.programOffset = writeRoleProgram(bc, final_program); + } } // Construct an initial role table containing the basic role information. static -void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc) { +void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc, + const map &suffixes) { DEBUG_PRINTF("building role table\n"); const RoseGraph &g = tbi.g; @@ -3210,7 +3453,8 @@ void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc) { } assert(!g[v].literals.empty()); - createRoleEntry(tbi, bc, v, roleTable, lookaround_cache, &stateIndex); + createRoleEntry(tbi, bc, v, roleTable, lookaround_cache, suffixes, + &stateIndex); } bc.numStates = stateIndex; @@ -3218,6 +3462,48 @@ void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc) { stateIndex); } +static +void makeRoleCheckRootBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, + vector &program) { + const RoseGraph &g = build.g; + const RoseVertex u = source(e, g); + + assert(u == build.root || u == build.anchored_root); + + // Use the minimum literal length. + u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); + + u32 min_bound = g[e].minBound + lit_length; + u32 max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + assert(g[u].max_offset != ROSE_BOUND_INF); + // Make offsets absolute. + min_bound += g[u].max_offset; + if (max_bound != ROSE_BOUND_INF) { + max_bound += g[u].max_offset; + } + } + + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS); + ri.u.checkRootBounds.min_bound = min_bound; + ri.u.checkRootBounds.max_bound = max_bound; + + // This precondition instruction should go near the start of + // the program, after the ONLY_EOD check if it's present. + auto it = + find_if(begin(program), end(program), [](const RoleInstruction &ri) { + return ri.code() > ROSE_ROLE_INSTR_CHECK_ONLY_EOD; + }); + program.insert(it, ri); +} + // Construct pred table and sparse iterators over preds. static void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, @@ -3249,16 +3535,17 @@ void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, "[%u, %u]\n", g[u].role, g[v].role, g[e].minBound, g[e].maxBound); if (tbi.isAnyStart(u)) { - /* we have ourselves a root role */ + // Solely root roles can be handled with no check at all (for + // very simple cases), or a bounds check in the role program. assert(u != tbi.root || g[e].maxBound == ROSE_BOUND_INF); if (u == tbi.root && g[e].minBound == 0) { DEBUG_PRINTF("root role with .* edge, no pred needed\n"); continue; /* no pred required */ } - tr.predOffset = verify_u32(predTable.size()); + tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - tr.flags |= ROSE_ROLE_PRED_ROOT; - createPred(tbi, bc, e, predTable); + auto &program = bc.rolePrograms[g[v].role]; + makeRoleCheckRootBounds(tbi, v, e, program); continue; } @@ -3287,26 +3574,11 @@ void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, // Collect in-edges, ordered by the state index of the predecessor. vector edges = make_vector_from(in_edges(v, g)); sort(edges.begin(), edges.end(), - EdgeSourceStateCompare(g, bc.roleTable)); + EdgeSourceStateCompare(g, bc.roleStateIndices)); - vector keys; - - // Create preds and collect state indices for our sparse iterator. for (const auto &e : edges) { createPred(tbi, bc, e, predTable); - RoseVertex u = source(e, g); - assert(g[u].role < bc.roleTable.size()); - u32 stateIdx = bc.roleTable.at(g[u].role).stateIndex; - if (stateIdx != MMB_INVALID) { - keys.push_back(stateIdx); - } } - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - - tr.predOffset = addIteratorToTable(bc, iter); } } @@ -3345,9 +3617,8 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, continue; } - assert(lbi.nfa); - assert(lbi.nfa->queueIndex >= leftfixBeginQueue); - u32 left_index = lbi.nfa->queueIndex - leftfixBeginQueue; + assert(lbi.queue >= leftfixBeginQueue); + u32 left_index = lbi.queue - leftfixBeginQueue; assert(left_index < leftfixCount); /* seedy hack to make miracles more effective. @@ -3414,8 +3685,17 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, // Build sparse iterators for literals. static -void buildSparseIter(build_context &bc, vector &literalTable, +void buildSparseIter(RoseBuildImpl &build, build_context &bc, + vector &literalTable, const vector &predTable) { + const RoseGraph &g = build.g; + + // Construct a mapping from role ids to state indices. + ue2::unordered_map role_to_state; + for (const auto &m : bc.roleStateIndices) { + role_to_state.emplace(g[m.first].role, m.second); + } + for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { RoseLiteral &tl = literalTable[finalId]; @@ -3441,14 +3721,16 @@ void buildSparseIter(build_context &bc, vector &literalTable, u32 p = bc.rolePredecessors.at(r)[0]; assert(p != ROSE_OFFSET_INVALID); RoseIterRole ir = { r, ROSE_OFFSET_INVALID }; - predStates[bc.roleTable[p].stateIndex].push_back(ir); + assert(contains(role_to_state, p)); + predStates[role_to_state.at(p)].push_back(ir); } else { const vector &myPreds = bc.rolePredecessors.at(r); for (u32 pred_entry : myPreds) { u32 p = predTable.at(pred_entry).role; RoseIterRole ir = { r, pred_entry }; assert(p < bc.roleTable.size()); - predStates[bc.roleTable[p].stateIndex].push_back(ir); + assert(contains(role_to_state, p)); + predStates[role_to_state.at(p)].push_back(ir); } } } @@ -3603,20 +3885,19 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.depths = findDepths(*this); // Build NFAs - vector> built_nfas; map suffixes; set no_retrigger_queues; bool mpv_as_outfix; - prepMpv(*this, &built_nfas, &historyRequired, &mpv_as_outfix); + prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); u32 outfixBeginQueue = qif.allocated_count(); - if (!prepOutfixes(*this, &built_nfas, &historyRequired)) { + if (!prepOutfixes(*this, bc, &historyRequired)) { return nullptr; } u32 outfixEndQueue = qif.allocated_count(); u32 leftfixBeginQueue = outfixEndQueue; - if (!buildNfas(*this, qif, &built_nfas, &suffixes, &bc.leftfix_info, - &no_retrigger_queues, &leftfixBeginQueue)) { + if (!buildNfas(*this, bc, qif, &suffixes, &no_retrigger_queues, + &leftfixBeginQueue)) { return nullptr; } buildCountingMiracles(*this, bc); @@ -3630,15 +3911,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 lit_benefits_size = verify_u32(sizeof(lit_benefits) * nonbenefits_base_id); assert(ISALIGNED_16(lit_benefits_size)); - u32 nfas_size = calcNfaSize(built_nfas); - - // Build our other tables - DEBUG_PRINTF("nfas_size %u\n", nfas_size); vector suffixEkeyLists; buildSuffixEkeyLists(*this, bc, qif, suffixes, &suffixEkeyLists); - buildInitialRoleTable(*this, bc); + buildInitialRoleTable(*this, bc, suffixes); DEBUG_PRINTF("roletable %zu\n", bc.roleTable.size()); @@ -3651,11 +3928,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - buildRoseTriggerLists(*this, bc); - vector literalTable; buildLiteralTable(*this, bc, literalTable); - buildSparseIter(bc, literalTable, predTable); + buildSparseIter(*this, bc, literalTable, predTable); u32 eodIterOffset; u32 eodIterMapOffset; @@ -3673,6 +3948,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } + // Write role programs into the engine blob. + writeRolePrograms(bc); + + // Write root programs for literals into the engine blob. + buildRootRolePrograms(*this, bc, literalTable); + u32 amatcherOffset = 0; u32 fmatcherOffset = 0; u32 ematcherOffset = 0; @@ -3685,13 +3966,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = sizeof(RoseEngine); } + UNUSED const size_t engineBlobSize = + byte_length(bc.engine_blob); // test later + currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); - /* leave space for the nfas */ - u32 base_nfa_offset = currOffset; - currOffset += nfas_size; - /* leave space for the benefits listing */ u32 base_lits_benefits_offset = currOffset; currOffset += lit_benefits_size; @@ -3754,13 +4034,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 nfaInfoLen = sizeof(NfaInfo) * queue_count; currOffset = nfaInfoOffset + nfaInfoLen; - vector rootRoleTable; - buildRootRoleTable(*this, roleOffset, literalTable, &rootRoleTable); - - u32 rootRoleOffset = ROUNDUP_N(currOffset, sizeof(u32)); - u32 rootRoleLen = sizeof(u32) * rootRoleTable.size(); - currOffset = rootRoleOffset + rootRoleLen; - vector art; // Reports raised by anchored roles vector arit; // inverse reportID -> position in art calcAnchoredMatches(*this, art, arit); @@ -3834,23 +4107,19 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(ISALIGNED_CL(ptr)); if (atable) { - assert(amatcherOffset >= base_nfa_offset); assert(amatcherOffset); memcpy(ptr + amatcherOffset, atable.get(), asize); } if (ftable) { assert(fmatcherOffset); - assert(fmatcherOffset >= base_nfa_offset); memcpy(ptr + fmatcherOffset, ftable.get(), fsize); } if (etable) { assert(ematcherOffset); - assert(ematcherOffset >= base_nfa_offset); memcpy(ptr + ematcherOffset, etable.get(), esize); } if (sbtable) { assert(sbmatcherOffset); - assert(sbmatcherOffset >= base_nfa_offset); memcpy(ptr + sbmatcherOffset, sbtable.get(), sbsize); } @@ -3902,8 +4171,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->anchoredReportInverseMapOffset = anchoredReportInverseMapOffset; engine->multidirectOffset = multidirectOffset; - engine->rootRoleCount = verify_u32(rootRoleTable.size()); - engine->rootRoleOffset = rootRoleOffset; engine->eodIterOffset = eodIterOffset; engine->eodIterMapOffset = eodIterMapOffset; @@ -3956,8 +4223,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->maxBiAnchoredWidth = findMaxBAWidth(*this); engine->noFloatingRoots = hasNoFloatingRoots(); engine->hasFloatingDirectReports = floating_direct_report; - engine->requiresEodCheck = hasEodAnchors(*this, built_nfas, - outfixEndQueue); + engine->requiresEodCheck = hasEodAnchors(*this, bc, outfixEndQueue); engine->hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); engine->canExhaust = rm.patternSetCanExhaust(); engine->hasSom = hasSom; @@ -3997,19 +4263,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { } NfaInfo *nfa_infos = (NfaInfo *)(ptr + nfaInfoOffset); - populateNfaInfoBasics(nfa_infos, outfixes, rm, suffixes, suffixEkeyLists); - updateNfaState(built_nfas, bc.leftfix_info, &engine->stateOffsets, nfa_infos, + populateNfaInfoBasics(*this, bc, outfixes, suffixes, suffixEkeyLists, + no_retrigger_queues, nfa_infos); + updateNfaState(bc, &engine->stateOffsets, nfa_infos, &engine->scratchStateSize, &engine->nfaStateSize, &engine->tStateSize); - // Copy in the NFAs and update roles - engine->nfaRegionBegin = base_nfa_offset; - engine->nfaRegionEnd = copyInNFAs(*this, &bc.roleTable, built_nfas, - no_retrigger_queues, nfa_infos, - base_nfa_offset, suffixes, ptr); - // We're done with the NFAs. - built_nfas.clear(); - /* do after update mask */ buildLitBenefits(*this, engine.get(), base_lits_benefits_offset); @@ -4024,12 +4283,15 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); copy_bytes(ptr + engine->predOffset, predTable); - copy_bytes(ptr + engine->rootRoleOffset, rootRoleTable); copy_bytes(ptr + engine->anchoredReportMapOffset, art); copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); copy_bytes(ptr + engine->multidirectOffset, mdr_reports); copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); + // Safety check: we shouldn't have written anything to the engine blob + // after we copied it into the engine bytecode. + assert(byte_length(bc.engine_blob) == engineBlobSize); + DEBUG_PRINTF("rose done %p\n", engine.get()); return engine; } diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index fd507a119..2a31a65a9 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -2154,53 +2154,6 @@ bool hasOrphanedTops(const RoseBuildImpl &tbi) { #endif // NDEBUG -/** - * \brief Normalise vertices so that every one has <= 1 report. - */ -static -void normaliseRoles(RoseBuildImpl &build) { - DEBUG_PRINTF("normalising\n"); - RoseGraph &g = build.g; - - vector work; // Vertices with > 1 report. - - for (const auto &v : vertices_range(g)) { - if (g[v].reports.size() > 1) { - work.push_back(v); - } - } - - DEBUG_PRINTF("%zu vertices to normalise\n", work.size()); - - for (const auto &v : work) { - DEBUG_PRINTF("exploding vertex %zu with %zu reports\n", g[v].idx, - g[v].reports.size()); - - // Make a copy of v for the trailing N-1 reports. Each of those gets - // one report and a copy of the in-edges. The first vertex retains the - // out-edges and suffix, if any are present. All the others don't need - // them. - - const auto &reports = g[v].reports; - - for (auto it = next(begin(reports)); it != end(reports); ++it) { - const ReportID &r = *it; - RoseVertex v2 = build.cloneVertex(v); - g[v2].reports = {r}; - - for (const auto &e : in_edges_range(v, g)) { - add_edge(source(e, g), v2, g[e], g); - } - - // No out-edges or suffix. - g[v2].suffix.reset(); - } - - // Vertex v retains the first report. - g[v].reports = {*begin(reports)}; - } -} - aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { dumpRoseGraph(*this, nullptr, "rose_early.dot"); @@ -2315,10 +2268,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { dumpRoseGraph(*this, nullptr, "rose_pre_norm.dot"); - // Ensure that every vertex has <= 1 report, since the Rose runtime - // requires this at present. - normaliseRoles(*this); - return buildFinalEngine(minWidth); } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index d4918e4ff..d69d28d62 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -34,6 +34,7 @@ #include "rose_build_impl.h" #include "rose/rose_dump.h" #include "rose_internal.h" +#include "rose_program.h" #include "ue2common.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" @@ -95,6 +96,59 @@ const RoseRole *getRoseRole(const RoseBuildImpl &build, return &roles[role_idx]; } +#define SKIP_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +template +const Struct * +findInstruction(const RoseEngine *t, const RoseRole *role) { + if (!role->programOffset) { + return nullptr; + } + + const char *pc = (const char *)t + role->programOffset; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + if (code == Opcode) { + return (const Struct *)pc; + } + // Skip to the next instruction. + switch (code) { + SKIP_CASE(ANCHORED_DELAY) + SKIP_CASE(CHECK_ONLY_EOD) + SKIP_CASE(CHECK_ROOT_BOUNDS) + SKIP_CASE(CHECK_LEFTFIX) + SKIP_CASE(CHECK_LOOKAROUND) + SKIP_CASE(SOM_ADJUST) + SKIP_CASE(SOM_LEFTFIX) + SKIP_CASE(TRIGGER_INFIX) + SKIP_CASE(TRIGGER_SUFFIX) + SKIP_CASE(REPORT) + SKIP_CASE(REPORT_CHAIN) + SKIP_CASE(REPORT_EOD) + SKIP_CASE(REPORT_SOM_INT) + SKIP_CASE(REPORT_SOM) + SKIP_CASE(REPORT_SOM_KNOWN) + SKIP_CASE(SET_STATE) + SKIP_CASE(SET_GROUPS) + case ROSE_ROLE_INSTR_END: + return nullptr; + default: + assert(0); + return nullptr; + } + } + + return nullptr; +} + +#undef SKIP_CASE + namespace { class RoseGraphWriter { @@ -149,9 +203,12 @@ class RoseGraphWriter { if (g[v].suffix) { os << "\\nSUFFIX (TOP " << g[v].suffix.top; if (r) { - assert(t); - const NFA *n = (const NFA *)((const char *)t + r->suffixOffset); - os << ", Q" << n->queueIndex; + const auto *ri = + findInstruction(t, r); + if (ri) { + os << ", Q" << ri->queue; + } } else { // Can't dump the queue number, but we can identify the suffix. if (g[v].suffix.graph) { @@ -191,7 +248,12 @@ class RoseGraphWriter { os << "\\nROSE " << roseKind; os << " ("; if (r) { - os << "Q" << r->leftfixQueue << ", "; + const auto *ri = + findInstruction(t, r); + if (ri) { + os << "Q" << ri->queue << ", "; + } } os << "report " << g[v].left.leftfix_report << ")"; @@ -555,19 +617,28 @@ void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t, for (RoseVertex v : vertices_range(g)) { const RoseRole *role = getRoseRole(build, t, v); - if (!role || role->lookaroundIndex == MO_INVALID_IDX) { + if (!role) { continue; } + const auto *ri = + findInstruction(t, role); + if (!ri) { + continue; + } + + const u32 look_idx = ri->index; + const u32 look_count = ri->count; + os << "Role " << g[v].role << endl; os << " literals: " << as_string_list(g[v].literals) << endl; - os << " lookaround: index=" << role->lookaroundIndex - << ", count=" << role->lookaroundCount << endl; + os << " lookaround: index=" << look_idx << ", count=" << look_count + << endl; - const s8 *look = look_base + role->lookaroundIndex; - const s8 *look_end = look + role->lookaroundCount; - const u8 *reach = - reach_base + role->lookaroundIndex * REACH_BITVECTOR_LEN; + const s8 *look = look_base + look_idx; + const s8 *look_end = look + look_count; + const u8 *reach = reach_base + look_idx * REACH_BITVECTOR_LEN; for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { os << " " << std::setw(4) << std::setfill(' ') << int{*look} diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 39596d8fd..b2604ff04 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -305,6 +305,11 @@ struct OutfixInfo { /* TODO: poly */ u32 get_queue(QueueIndexFactory &qif); + u32 get_queue() const { + assert(queue != ~0U); + return queue; + } + bool is_nonempty_mpv() const { return !puffettes.empty() || !triggered_puffettes.empty(); } @@ -329,9 +334,6 @@ struct OutfixInfo { /* TODO: poly */ std::vector puffettes; std::vector triggered_puffettes; - /** Once the outfix has been built into an engine, this will point to it. */ - NFA *nfa = nullptr; - RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width depth minWidth = depth::infinity(); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index e42e0aca6..e89a17728 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2572,10 +2572,6 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { assert(!it->is_dead()); - if (it->nfa) { - assert(!it->rdfa && !it->holder && !it->haig); - continue; - } assert(!it->chained); if (it->rdfa) { dfas.push_back(it->rdfa.get()); @@ -2650,10 +2646,6 @@ void mergeOutfixes(RoseBuildImpl &tbi) { vector som_dfas; for (const auto &outfix : tbi.outfixes) { - if (outfix.nfa) { - assert(!outfix.rdfa && !outfix.holder && !outfix.haig); - continue; - } assert(!outfix.chained); if (outfix.rdfa) { dfas.push_back(outfix.rdfa.get()); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 8fbef8891..109c2d26c 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -629,8 +629,6 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } for (const auto &outfix : tbi.outfixes) { - assert(!outfix.nfa); /* should not be built yet */ - for (const auto &report_id : all_reports(outfix)) { outfix_map[report_id].insert(&outfix); } @@ -738,7 +736,6 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( for (const auto &outfix_ptr : outfixes) { assert(outfix_ptr); const OutfixInfo &out = *outfix_ptr; - assert(!out.nfa); /* should not be built yet */ if (has_outfix || has_role || has_suffix) { return true; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index b9c0c05be..6ec890642 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -34,6 +34,7 @@ #include "rose_dump.h" #include "rose_common.h" #include "rose_internal.h" +#include "rose_program.h" #include "hs_compile.h" #include "ue2common.h" #include "nfa/nfa_build_util.h" @@ -202,47 +203,240 @@ u32 rolesWithFlag(const RoseEngine *t, u32 flag) { return n; } +#define HANDLE_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + static -u32 rolesWithSuffixes(const RoseEngine *t) { +u32 rolesWithInstr(const RoseEngine *t, + enum RoseRoleInstructionCode find_code) { u32 n = 0; const RoseRole *tr = getRoleTable(t); const RoseRole *tr_end = tr + t->roleCount; for (; tr != tr_end; ++tr) { - if (tr->suffixOffset) { - n++; + if (!tr->programOffset) { + continue; } + + const char *pc = (const char *)t + tr->programOffset; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + if (code == find_code) { + n++; + goto next_role; + } + switch (code) { + HANDLE_CASE(CHECK_ONLY_EOD) + HANDLE_CASE(CHECK_ROOT_BOUNDS) + HANDLE_CASE(CHECK_LOOKAROUND) + HANDLE_CASE(CHECK_LEFTFIX) + HANDLE_CASE(ANCHORED_DELAY) + HANDLE_CASE(SOM_ADJUST) + HANDLE_CASE(SOM_LEFTFIX) + HANDLE_CASE(TRIGGER_INFIX) + HANDLE_CASE(TRIGGER_SUFFIX) + HANDLE_CASE(REPORT) + HANDLE_CASE(REPORT_CHAIN) + HANDLE_CASE(REPORT_EOD) + HANDLE_CASE(REPORT_SOM_INT) + HANDLE_CASE(REPORT_SOM) + HANDLE_CASE(REPORT_SOM_KNOWN) + HANDLE_CASE(SET_STATE) + HANDLE_CASE(SET_GROUPS) + case ROSE_ROLE_INSTR_END: + goto next_role; + default: + assert(0); + return 0; + } + } + next_role:; } return n; } +#undef HANDLE_CASE + +#define PROGRAM_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ + << ": " #name " (" << (int)ROSE_ROLE_INSTR_##name << ")" << endl; \ + const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + static -u32 rolesWithLookaround(const RoseEngine *t) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; +void dumpRoleProgram(ofstream &os, const char *pc) { + const char *pc_base = pc; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + switch (code) { + PROGRAM_CASE(ANCHORED_DELAY) { + os << " depth " << u32{ri->depth} << endl; + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + os << " done_jump +" << ri->done_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION - for (; tr != tr_end; ++tr) { - if (tr->lookaroundIndex != MO_INVALID_IDX) { - n++; + PROGRAM_CASE(CHECK_ONLY_EOD) { + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + os << " min_bound " << ri->min_bound << endl; + os << " max_bound " << ri->max_bound << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + os << " index " << ri->index << endl; + os << " count " << ri->count << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LEFTFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + os << " report " << ri->report << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + os << " distance " << ri->distance << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + os << " cancel " << u32{ri->cancel} << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EOD) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_KNOWN) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + os << " depth " << u32{ri->depth} << endl; + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(END) { return; } + PROGRAM_NEXT_INSTRUCTION + + default: + os << " UNKNOWN (code " << int{code} << ")" << endl; + os << " " << endl; + return; } } - return n; } -// Count roles that fire reports +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION + static -u32 rolesWithReports(const RoseEngine *t) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; +void dumpRoseRolePrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); - for (; tr != tr_end; ++tr) { - if (tr->reportId != MO_INVALID_IDX) { - n++; + const RoseRole *roles = getRoleTable(t); + const char *base = (const char *)t; + + for (u32 i = 0; i < t->roleCount; i++) { + const RoseRole *role = &roles[i]; + os << "Role " << i << endl; + + if (!role->programOffset) { + os << " " << endl; + continue; } + + dumpRoleProgram(os, base + role->programOffset); + os << endl; } - return n; + + os.close(); +} + +static +void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const RoseLiteral *lits = getLiteralTable(t); + const char *base = (const char *)t; + + for (u32 i = 0; i < t->literalCount; i++) { + const RoseLiteral *lit = &lits[i]; + if (!lit->rootProgramOffset) { + continue; + } + + os << "Literal " << i << endl; + dumpRoleProgram(os, base + lit->rootProgramOffset); + os << endl; + } + + os.close(); } static @@ -279,16 +473,6 @@ void dumpPreds(FILE *f, const RoseEngine *t) { } } -static -const char *startNfaRegion(const RoseEngine *t) { - return (const char *)t + t->nfaRegionBegin; -} - -static -const char *endNfaRegion(const RoseEngine *t) { - return (const char *)t + t->nfaRegionEnd; -} - static void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { const u32 qindex = n->queueIndex; @@ -353,18 +537,15 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { ss << base << "rose_components.txt"; ofstream fout(ss.str().c_str()); - const char *p = startNfaRegion(t); - const char *pe = endNfaRegion(t); - fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; - while (p < pe) { - const NFA *n = (const NFA *)p; - u32 i = n->queueIndex; + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); fout << left << setw(6) << i << " "; - fout << left << (p - (const char *)t) << "\t"; /* offset */ + fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */ fout << left << setw(16) << describe(*n) << "\t"; @@ -375,8 +556,6 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { dumpNfaNotes(fout, t, n); fout << endl; - - p += ROUNDUP_CL(n->length); } } @@ -416,20 +595,17 @@ void dumpExhaust(const RoseEngine *t, const string &base) { static void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { - const char *p = startNfaRegion(t); - const char *pe = endNfaRegion(t); - dumpExhaust(t, base); - while (p < pe) { - const NFA *n = (const NFA *)p; - u32 q = n->queueIndex; + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); stringstream sstxt, ssdot, ssraw; - sstxt << base << "rose_nfa_" << q << ".txt"; - ssdot << base << "rose_nfa_" << q << ".dot"; - ssraw << base << "rose_nfa_" << q << ".raw"; + sstxt << base << "rose_nfa_" << i << ".txt"; + ssdot << base << "rose_nfa_" << i << ".dot"; + ssraw << base << "rose_nfa_" << i << ".raw"; FILE *f; @@ -446,8 +622,6 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { fwrite(n, 1, n->length, f); fclose(f); } - - p += ROUNDUP_CL(n->length); } } @@ -638,9 +812,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %u bytes\n", - t->rootRoleOffset - t->nfaInfoOffset); - fprintf(f, " - root role table : %zu bytes\n", - t->rootRoleCount * sizeof(u32)); + t->anchoredReportMapOffset - t->nfaInfoOffset); fprintf(f, " - lookaround table : %u bytes\n", t->predOffset - t->lookaroundTableOffset); fprintf(f, " - lookaround reach : %u bytes\n", @@ -686,24 +858,23 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "number of roles : %u\n", t->roleCount); fprintf(f, " - with state index : %u\n", t->rolesWithStateCount); fprintf(f, " - with leftfix nfa : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_ROSE)); - fprintf(f, " - with suffix nfa : %u\n", rolesWithSuffixes(t)); - fprintf(f, " - with lookaround : %u\n", rolesWithLookaround(t)); - fprintf(f, " - with reports : %u\n", rolesWithReports(t)); + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LEFTFIX)); + fprintf(f, " - with suffix nfa : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_TRIGGER_SUFFIX)); + fprintf(f, " - with lookaround : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LOOKAROUND)); + fprintf(f, " - with reports : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT)); fprintf(f, " - with som reports : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_SOM_REPORT)); - fprintf(f, " - with eod accepts : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_ACCEPT_EOD)); + rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT_SOM_INT)); fprintf(f, " - match only at end : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_ONLY_AT_END)); + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); fprintf(f, " + anchored : %u\n", t->anchoredMatches); - fprintf(f, " - no preds (root) : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_NONE)); fprintf(f, " - simple preds : %u\n", rolesWithFlag(t, ROSE_ROLE_PRED_SIMPLE)); - fprintf(f, " - root preds : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_ROOT)); + fprintf(f, " - bound root preds : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS)); fprintf(f, " - 'any' preds : %u\n", rolesWithFlag(t, ROSE_ROLE_PRED_ANY)); fprintf(f, "number of preds : %u\n", t->predCount); @@ -810,8 +981,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, roleCount); DUMP_U32(t, predOffset); DUMP_U32(t, predCount); - DUMP_U32(t, rootRoleOffset); - DUMP_U32(t, rootRoleCount); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); @@ -872,8 +1041,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, literalBenefitsOffsets); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, nfaRegionBegin); - DUMP_U32(t, nfaRegionEnd); DUMP_U32(t, group_weak_end); DUMP_U32(t, floatingStreamState); DUMP_U32(t, eodLiteralId); @@ -912,19 +1079,7 @@ void roseDumpRoleStructRaw(const RoseEngine *t, FILE *f) { for (const RoseRole *p = tr; p < tr_end; p++) { fprintf(f, "role[%zu] = {\n", p - tr); DUMP_U32(p, flags); - DUMP_U32(p, predOffset); - DUMP_U64(p, groups); - DUMP_U32(p, reportId); - DUMP_U32(p, stateIndex); - DUMP_U32(p, suffixEvent); - DUMP_U8(p, depth); - DUMP_U32(p, suffixOffset); - DUMP_U32(p, leftfixReport); - DUMP_U32(p, leftfixLag); - DUMP_U32(p, leftfixQueue); - DUMP_U32(p, somAdjust); - DUMP_U32(p, lookaroundIndex); - DUMP_U32(p, lookaroundCount); + DUMP_U32(p, programOffset); fprintf(f, "}\n"); } } @@ -935,6 +1090,10 @@ void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) dumpAnchored(t, base); dumpRevComponentInfo(t, base); dumpRevNfas(t, dump_raw, base); + + // Role programs. + dumpRoseRolePrograms(t, base + "/rose_role_programs.txt"); + dumpRoseLitPrograms(t, base + "/rose_lit_root_programs.txt"); } void roseDumpInternals(const RoseEngine *t, const string &base) { diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 335d2b2cb..00e62eb96 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -75,13 +75,7 @@ ReportID literalToReport(u32 id) { // Structure representing a literal. Each literal may have many roles. struct RoseLiteral { - u32 rootRoleOffset; /**< If rootRoleCount == 1, this is an offset relative - * to the rose engine to the root role associated with - * the literal. - * If rootRoleCount > 1, this is the first index into - * the rootRoleTable indicating the root roles. - */ - u32 rootRoleCount; // number of root roles + u32 rootProgramOffset; // role program to run for root roles. u32 iterOffset; // offset of sparse iterator, relative to rose u32 iterMapOffset; // offset of the iter mapping table, relative to rose rose_group groups; // bitset of groups that cause this literal to fire. @@ -216,13 +210,6 @@ struct LeftNfaInfo { rose_group squash_mask; /* & mask applied when rose nfa dies */ }; -// A list of these is used to trigger prefix/infix roses. -struct RoseTrigger { - u32 queue; // queue index of leftfix - u32 event; // queue event, from MQE_* - u8 cancel_prev_top; -}; - struct NfaInfo { u32 nfaOffset; u32 stateOffset; @@ -238,42 +225,14 @@ struct NfaInfo { * matches */ }; -#define ROSE_ROLE_FLAG_ANCHOR_TABLE (1U << 0) /**< role is triggered from - * anchored table */ -#define ROSE_ROLE_FLAG_ACCEPT_EOD (1U << 2) /**< "fake" role, fires callback - * at EOD */ -#define ROSE_ROLE_FLAG_ONLY_AT_END (1U << 3) /**< role can only be switched on - * at end of block */ -#define ROSE_ROLE_FLAG_PRED_OF_EOD (1U << 4) /**< eod is a successor literal - * of the role */ -#define ROSE_ROLE_FLAG_EOD_TABLE (1U << 5) /**< role is triggered from eod - * table */ -#define ROSE_ROLE_FLAG_ROSE (1U << 6) /**< rose style prefix nfa for - * role */ -#define ROSE_ROLE_FLAG_SOM_REPORT (1U << 7) /**< report id is only used to - * manipulate som */ -#define ROSE_ROLE_FLAG_REPORT_START (1U << 8) /**< som som som som */ -#define ROSE_ROLE_FLAG_CHAIN_REPORT (1U << 9) /**< report id is only used to - * start an outfix engine */ -#define ROSE_ROLE_FLAG_SOM_ADJUST (1U << 10) /**< som value to use is offset - * from match end location */ -#define ROSE_ROLE_FLAG_SOM_ROSEFIX (1U << 11) /**< som value to use is provided - * by prefix/infix */ - /* We allow different types of role-predecessor relationships. These are stored * in with the flags */ -#define ROSE_ROLE_PRED_NONE (1U << 20) /**< the only pred is the root, - * [0, inf] bounds */ #define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no * offset tracking */ -#define ROSE_ROLE_PRED_ROOT (1U << 22) /**< pred is root or anchored - * root, and we have bounds */ #define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */ -#define ROSE_ROLE_PRED_CLEAR_MASK (~(ROSE_ROLE_PRED_NONE \ - | ROSE_ROLE_PRED_SIMPLE \ - | ROSE_ROLE_PRED_ROOT \ - | ROSE_ROLE_PRED_ANY)) +#define ROSE_ROLE_PRED_CLEAR_MASK \ + (~(ROSE_ROLE_PRED_SIMPLE | ROSE_ROLE_PRED_ANY)) #define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one * whole byte (OWB) (streaming only). Other @@ -285,28 +244,7 @@ struct NfaInfo { // Structure representing a literal role. struct RoseRole { u32 flags; - u32 predOffset; // either offset of pred sparse iterator, or - // (for ROSE_ROLE_PRED_ROOT) index of single RosePred. - rose_group groups; /**< groups to enable when role is set (groups of succ - * literals) */ - ReportID reportId; // report ID, or MO_INVALID_IDX - u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not - * require a state bit if they are terminal */ - u32 suffixEvent; // queue event, from MQE_ - u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater. - */ - u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role, - * relative to base of the rose. */ - ReportID leftfixReport; // (pre|in)fix report to check, or MO_INVALID_IDX. - u32 leftfixLag; /**< distance behind match where we need to check the - * leftfix engine status */ - u32 leftfixQueue; /**< queue index of the prefix/infix before role */ - u32 infixTriggerOffset; /* offset to list of infix roses to trigger */ - u32 somAdjust; /**< som for the role is offset from end match offset */ - - u32 lookaroundIndex; /**< index of lookaround offset/reach in table, or - * MO_INVALID_IDX. */ - u32 lookaroundCount; /**< number of lookaround entries. */ + u32 programOffset; /**< offset to program to run. */ }; // Structure representing a predecessor relationship @@ -513,8 +451,6 @@ struct RoseEngine { u32 roleCount; // number of RoseRole entries u32 predOffset; // offset of RosePred array (bytes) u32 predCount; // number of RosePred entries - u32 rootRoleOffset; - u32 rootRoleCount; u32 leftOffset; u32 roseCount; @@ -584,8 +520,6 @@ struct RoseEngine { id */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ - u32 nfaRegionBegin; /* start of the nfa region, debugging only */ - u32 nfaRegionEnd; /* end of the nfa region, debugging only */ u32 group_weak_end; /* end of weak groups, debugging only */ u32 floatingStreamState; // size in bytes u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0. @@ -715,13 +649,6 @@ const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) { return it; } -static really_inline -const u32 *getRootRoleTable(const struct RoseEngine *t) { - const u32 *r = (const u32 *)((const char *)t + t->rootRoleOffset); - assert(ISALIGNED_N(r, 4)); - return r; -} - static really_inline const struct lit_benefits *getLiteralBenefitsTable( const struct RoseEngine *t) { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h new file mode 100644 index 000000000..40f013ca2 --- /dev/null +++ b/src/rose/rose_program.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose data structures to do with role programs. + */ + +#ifndef ROSE_ROSE_PROGRAM_H +#define ROSE_ROSE_PROGRAM_H + +#include "rose_internal.h" +#include "ue2common.h" + +/** \brief Minimum alignment for each instruction in memory. */ +#define ROSE_INSTR_MIN_ALIGN 8U + +/** \brief Role program instruction opcodes. */ +enum RoseRoleInstructionCode { + ROSE_ROLE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_ROLE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. + ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS, //!< Bounds on distance from root. + ROSE_ROLE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + ROSE_ROLE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. + ROSE_ROLE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. + ROSE_ROLE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + ROSE_ROLE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. + ROSE_ROLE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. + ROSE_ROLE_INSTR_REPORT, //!< Fire an ordinary report. + ROSE_ROLE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). + ROSE_ROLE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. + ROSE_ROLE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. + ROSE_ROLE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. + ROSE_ROLE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. + ROSE_ROLE_INSTR_SET_STATE, //!< Switch a state index on. + ROSE_ROLE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_ROLE_INSTR_END //!< End of program. +}; + +struct ROSE_ROLE_STRUCT_ANCHORED_DELAY { + u8 code; //!< From enum RoseRoleInstructionCode. + u8 depth; //!< Depth for this state. + rose_group groups; //!< Bitmask. + u32 done_jump; //!< Jump forward this many bytes if successful. +}; + +struct ROSE_ROLE_STRUCT_CHECK_ONLY_EOD { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 min_bound; //!< Min distance from zero. + u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_CHECK_LOOKAROUND { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 index; + u32 count; + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 queue; //!< Queue of leftfix to check. + u32 lag; //!< Lag of leftfix for this case. + ReportID report; //!< ReportID of leftfix to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_SOM_ADJUST { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 distance; //!< Distance to EOM. +}; + +struct ROSE_ROLE_STRUCT_SOM_LEFTFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 queue; //!< Queue index of leftfix providing SOM. + u32 lag; //!< Lag of leftfix for this case. +}; + +struct ROSE_ROLE_STRUCT_TRIGGER_INFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u8 cancel; //!< Cancels previous top event. + u32 queue; //!< Queue index of infix. + u32 event; //!< Queue event, from MQE_*. +}; + +struct ROSE_ROLE_STRUCT_TRIGGER_SUFFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 queue; //!< Queue index of suffix. + u32 event; //!< Queue event, from MQE_*. +}; + +struct ROSE_ROLE_STRUCT_REPORT { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_CHAIN { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_EOD { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_SOM_INT { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_SOM { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_SET_STATE { + u8 code; //!< From enum RoseRoleInstructionCode. + u8 depth; //!< Depth for this state. + u32 index; //!< State index in multibit. +}; + +struct ROSE_ROLE_STRUCT_SET_GROUPS { + u8 code; //!< From enum RoseRoleInstructionCode. + rose_group groups; //!< Bitmask. +}; + +struct ROSE_ROLE_STRUCT_END { + u8 code; //!< From enum RoseRoleInstructionCode. +}; + +#endif // ROSE_ROSE_PROGRAM_H diff --git a/src/util/container.h b/src/util/container.h index 62e841c14..63e27743f 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -100,8 +100,9 @@ std::set assoc_keys(const C &container) { /** * \brief Return the length in bytes of the given vector of (POD) objects. */ -template -typename std::vector::size_type byte_length(const std::vector &vec) { +template +typename std::vector::size_type +byte_length(const std::vector &vec) { static_assert(std::is_pod::value, "should be pod"); return vec.size() * sizeof(T); } @@ -110,8 +111,8 @@ typename std::vector::size_type byte_length(const std::vector &vec) { * \brief Copy the given vector of POD objects to the given location in memory. * It is safe to give this function an empty vector. */ -template -void *copy_bytes(void *dest, const std::vector &vec) { +template +void *copy_bytes(void *dest, const std::vector &vec) { static_assert(std::is_pod::value, "should be pod"); assert(dest); From d67c7583eac87a4ca98f3ef01dcbbba3a66766d6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 4 Dec 2015 16:17:28 +1100 Subject: [PATCH 008/218] rose: Extend the interpreter to handle more work - Use program for EOD sparse iterator - Use program for literal sparse iterator - Eliminate RoseRole, RosePred, RoseVertexProps::role - Small performance optimizations --- src/rose/block.c | 4 +- src/rose/eod.c | 58 +- src/rose/init.c | 4 +- src/rose/match.c | 101 ++- src/rose/match.h | 24 - src/rose/rose_build_bytecode.cpp | 1083 +++++++++++++----------------- src/rose/rose_build_dump.cpp | 206 +----- src/rose/rose_build_misc.cpp | 6 +- src/rose/rose_dump.cpp | 376 ++++------- src/rose/rose_graph.h | 13 +- src/rose/rose_internal.h | 132 ++-- src/rose/rose_program.h | 85 +-- src/rose/runtime.h | 9 - src/scratch.c | 8 +- src/scratch.h | 2 +- 15 files changed, 759 insertions(+), 1352 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index ae7d5545c..cfcb83416 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -82,8 +82,8 @@ void init_state_for_block(const struct RoseEngine *t, u8 *state) { assert(t); assert(state); - DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n", - t, t->roleCount, t->rolesWithStateCount); + DEBUG_PRINTF("init for Rose %p with %u state indices\n", t, + t->rolesWithStateCount); // Rose is guaranteed 8-aligned state assert(ISALIGNED_N(state, 8)); diff --git a/src/rose/eod.c b/src/rose/eod.c index 60bf2ea2c..dec07b546 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -113,12 +113,11 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, return MO_CONTINUE_MATCHING; } - const struct RoseRole *roleTable = getRoleTable(t); - const struct RosePred *predTable = getPredTable(t); - const struct RoseIterMapping *iterMapBase - = getByOffset(t, t->eodIterMapOffset); + DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset); + + const u32 *programTable = getByOffset(t, t->eodProgramTableOffset); const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); - assert(ISALIGNED(iterMapBase)); + assert(ISALIGNED(programTable)); assert(ISALIGNED(it)); // Sparse iterator state was allocated earlier @@ -133,50 +132,17 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, fatbit_clear(handled_roles); + int work_done = 0; // not read from in this path. + for (; i != MMB_INVALID; i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx); - const struct RoseIterMapping *iterMap = iterMapBase + idx; - const struct RoseIterRole *roles = getByOffset(t, iterMap->offset); - assert(ISALIGNED(roles)); - - DEBUG_PRINTF("%u roles to consider\n", iterMap->count); - for (u32 j = 0; j != iterMap->count; j++) { - u32 role = roles[j].role; - assert(role < t->roleCount); - DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred); - const struct RoseRole *tr = roleTable + role; - - if (fatbit_isset(handled_roles, t->roleCount, role)) { - DEBUG_PRINTF("role %u already handled by the walk, skip\n", - role); - continue; - } - - // Special case: if this role is a trivial case (pred type simple) - // we don't need to check any history and we already know the pred - // role is on. - if (tr->flags & ROSE_ROLE_PRED_SIMPLE) { - DEBUG_PRINTF("pred type is simple, no need for checks\n"); - } else { - assert(roles[j].pred < t->predCount); - const struct RosePred *tp = predTable + roles[j].pred; - if (!roseCheckPredHistory(tp, offset)) { - continue; - } - } - - /* mark role as handled so we don't touch it again in this walk */ - fatbit_set(handled_roles, t->roleCount, role); - - u64a som = 0; - int work_done = 0; - hwlmcb_rv_t rv = - roseRunRoleProgram(t, tr->programOffset, offset, &som, - &(scratch->tctxt), &work_done); - if (rv == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } + u32 programOffset = programTable[idx]; + u64a som = 0; + if (roseRunRoleProgram(t, programOffset, offset, &som, + &(scratch->tctxt), + &work_done) == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; } } diff --git a/src/rose/init.c b/src/rose/init.c index c2eccd400..d2f85f2c5 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -80,8 +80,8 @@ void roseInitState(const struct RoseEngine *t, u8 *state) { assert(t); assert(state); - DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n", - t, t->roleCount, t->rolesWithStateCount); + DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t, + t->rolesWithStateCount); // Rose is guaranteed 8-aligned state assert(ISALIGNED_N(state, 8)); diff --git a/src/rose/match.c b/src/rose/match.c index ac9958668..591abcfb3 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1107,10 +1107,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { } #define PROGRAM_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ - DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_ROLE_INSTR_##name); \ - const struct ROSE_ROLE_STRUCT_##name *ri = \ - (const struct ROSE_ROLE_STRUCT_##name *)pc; + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; #define PROGRAM_NEXT_INSTRUCTION \ pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ @@ -1121,26 +1121,28 @@ static really_inline hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, u64a end, u64a *som, struct RoseContext *tctxt, char in_anchored, int *work_done) { - assert(programOffset); - DEBUG_PRINTF("program begins at offset %u\n", programOffset); + assert(programOffset); + assert(programOffset < t->size); + const char *pc = getByOffset(t, programOffset); - assert(*(const u8 *)pc != ROSE_ROLE_INSTR_END); + assert(*(const u8 *)pc != ROSE_INSTR_END); for (;;) { assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); + assert(code <= ROSE_INSTR_END); - switch ((enum RoseRoleInstructionCode)code) { + switch ((enum RoseInstructionCode)code) { PROGRAM_CASE(ANCHORED_DELAY) { if (in_anchored && end > t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("delay until playback\n"); update_depth(tctxt, ri->depth); tctxt->groups |= ri->groups; *work_done = 1; + assert(ri->done_jump); // must progress pc += ri->done_jump; continue; } @@ -1151,16 +1153,29 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, struct core_info *ci = &tctxtToScratch(tctxt)->core_info; if (end != ci->buf_offset + ci->len) { DEBUG_PRINTF("should only match at end of data\n"); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + PROGRAM_CASE(CHECK_BOUNDS) { if (!in_anchored && !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { DEBUG_PRINTF("failed root bounds check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles; + if (fatbit_set(handled, t->handledKeyCount, ri->key)) { + DEBUG_PRINTF("key %u already set\n", ri->key); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } @@ -1170,6 +1185,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(CHECK_LOOKAROUND) { if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } @@ -1180,6 +1196,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, tctxt)) { DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } @@ -1334,12 +1351,9 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, struct RoseContext *tctxt) { /* assert(!tctxt->in_anchored); */ /* assert(!tctxt->in_anch_playback); */ - const struct RoseRole *roleTable = getRoleTable(t); - const struct RosePred *predTable = getPredTable(t); - const struct RoseIterMapping *iterMapBase - = getByOffset(t, tl->iterMapOffset); + const u32 *iterProgram = getByOffset(t, tl->iterProgramOffset); const struct mmbit_sparse_iter *it = getByOffset(t, tl->iterOffset); - assert(ISALIGNED(iterMapBase)); + assert(ISALIGNED(iterProgram)); assert(ISALIGNED(it)); // Sparse iterator state was allocated earlier @@ -1356,50 +1370,19 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, fatbit_clear(handled_roles); for (; i != MMB_INVALID; - i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { - DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx); - const struct RoseIterMapping *iterMap = iterMapBase + idx; - const struct RoseIterRole *roles = getByOffset(t, iterMap->offset); - assert(ISALIGNED(roles)); - - DEBUG_PRINTF("%u roles to consider\n", iterMap->count); - for (u32 j = 0; j != iterMap->count; j++) { - u32 role = roles[j].role; - assert(role < t->roleCount); - DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred); - const struct RoseRole *tr = roleTable + role; - - if (fatbit_isset(handled_roles, t->roleCount, role)) { - DEBUG_PRINTF("role %u already handled by the walk, skip\n", - role); - continue; - } - - // Special case: if this role is a trivial case (pred type simple) - // we don't need to check any history and we already know the pred - // role is on. - if (tr->flags & ROSE_ROLE_PRED_SIMPLE) { - DEBUG_PRINTF("pred type is simple, no need for further" - " checks\n"); - } else { - assert(roles[j].pred < t->predCount); - const struct RosePred *tp = predTable + roles[j].pred; - if (!roseCheckPredHistory(tp, end)) { - continue; - } - } - - /* mark role as handled so we don't touch it again in this walk */ - fatbit_set(handled_roles, t->roleCount, role); - - if (!tr->programOffset) { - continue; - } - u64a som = 0ULL; - if (roseRunRoleProgram_i(t, tr->programOffset, end, &som, tctxt, 0, - &work_done) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } + i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { + u32 programOffset = iterProgram[idx]; + DEBUG_PRINTF("pred state %u (iter idx=%u) is on -> program %u\n", i, + idx, programOffset); + + // If this bit is switched on in the sparse iterator, it must be + // driving a program. + assert(programOffset); + + u64a som = 0ULL; + if (roseRunRoleProgram_i(t, programOffset, end, &som, tctxt, 0, + &work_done) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; } } diff --git a/src/rose/match.h b/src/rose/match.h index a39bebf37..6bcf781e7 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -269,30 +269,6 @@ void update_depth(struct RoseContext *tctxt, u8 depth) { tctxt->depth = d; } -static really_inline -int roseCheckHistoryAnch(const struct RosePred *tp, u64a end) { - DEBUG_PRINTF("end %llu min %u max %u\n", end, tp->minBound, tp->maxBound); - if (tp->maxBound == ROSE_BOUND_INF) { - return end >= tp->minBound; - } else { - return end >= tp->minBound && end <= tp->maxBound; - } -} - -// Check that a predecessor's history requirements are satisfied. -static really_inline -int roseCheckPredHistory(const struct RosePred *tp, u64a end) { - DEBUG_PRINTF("pred type %u\n", tp->historyCheck); - - if (tp->historyCheck == ROSE_ROLE_HISTORY_ANCH) { - return roseCheckHistoryAnch(tp, end); - } - - assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE || - tp->historyCheck == ROSE_ROLE_HISTORY_LAST_BYTE); - return 1; -} - /* Note: uses the stashed sparse iter state; cannot be called from * anybody else who is using it */ static rose_inline diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 978d413db..6b6e443fd 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -165,52 +165,53 @@ struct left_build_info { }; /** \brief Role instruction model used at compile time. */ -class RoleInstruction { +class RoseInstruction { public: - RoleInstruction() { + RoseInstruction() { memset(&u, 0, sizeof(u)); - u.end.code = ROSE_ROLE_INSTR_END; + u.end.code = ROSE_INSTR_END; } - explicit RoleInstruction(enum RoseRoleInstructionCode c) { + explicit RoseInstruction(enum RoseInstructionCode c) { memset(&u, 0, sizeof(u)); u.end.code = c; } - bool operator<(const RoleInstruction &a) const { + bool operator<(const RoseInstruction &a) const { return memcmp(&u, &a.u, sizeof(u)) < 0; } - bool operator==(const RoleInstruction &a) const { + bool operator==(const RoseInstruction &a) const { return memcmp(&u, &a.u, sizeof(u)) == 0; } - enum RoseRoleInstructionCode code() const { + enum RoseInstructionCode code() const { // Note that this sort of type-punning (relying on identical initial // layout) is explicitly allowed by the C++11 standard. - return (enum RoseRoleInstructionCode)u.end.code; + return (enum RoseInstructionCode)u.end.code; } const void *get() const { switch (code()) { - case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; - case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return &u.checkRootBounds; - case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; - case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; - case ROSE_ROLE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; - case ROSE_ROLE_INSTR_SOM_ADJUST: return &u.somAdjust; - case ROSE_ROLE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; - case ROSE_ROLE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; - case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; - case ROSE_ROLE_INSTR_REPORT: return &u.report; - case ROSE_ROLE_INSTR_REPORT_CHAIN: return &u.reportChain; - case ROSE_ROLE_INSTR_REPORT_EOD: return &u.reportEod; - case ROSE_ROLE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; - case ROSE_ROLE_INSTR_REPORT_SOM: return &u.reportSom; - case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; - case ROSE_ROLE_INSTR_SET_STATE: return &u.setState; - case ROSE_ROLE_INSTR_SET_GROUPS: return &u.setGroups; - case ROSE_ROLE_INSTR_END: return &u.end; + case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; + case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; + case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; + case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; + case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; + case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; + case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; + case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; + case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; + case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; + case ROSE_INSTR_REPORT: return &u.report; + case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; + case ROSE_INSTR_REPORT_EOD: return &u.reportEod; + case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; + case ROSE_INSTR_REPORT_SOM: return &u.reportSom; + case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; + case ROSE_INSTR_SET_STATE: return &u.setState; + case ROSE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_INSTR_END: return &u.end; } assert(0); return &u.end; @@ -218,85 +219,84 @@ class RoleInstruction { size_t length() const { switch (code()) { - case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); - case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return sizeof(u.checkRootBounds); - case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); - case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); - case ROSE_ROLE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); - case ROSE_ROLE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); - case ROSE_ROLE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); - case ROSE_ROLE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); - case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); - case ROSE_ROLE_INSTR_REPORT: return sizeof(u.report); - case ROSE_ROLE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); - case ROSE_ROLE_INSTR_REPORT_EOD: return sizeof(u.reportEod); - case ROSE_ROLE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); - case ROSE_ROLE_INSTR_REPORT_SOM: return sizeof(u.reportSom); - case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); - case ROSE_ROLE_INSTR_SET_STATE: return sizeof(u.setState); - case ROSE_ROLE_INSTR_SET_GROUPS: return sizeof(u.setGroups); - case ROSE_ROLE_INSTR_END: return sizeof(u.end); + case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); + case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); + case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); + case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); + case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); + case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); + case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); + case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); + case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); + case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); + case ROSE_INSTR_REPORT: return sizeof(u.report); + case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); + case ROSE_INSTR_REPORT_EOD: return sizeof(u.reportEod); + case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); + case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); + case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); + case ROSE_INSTR_SET_STATE: return sizeof(u.setState); + case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_INSTR_END: return sizeof(u.end); } return 0; } union { - ROSE_ROLE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; - ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS checkRootBounds; - ROSE_ROLE_STRUCT_CHECK_LOOKAROUND checkLookaround; - ROSE_ROLE_STRUCT_CHECK_LEFTFIX checkLeftfix; - ROSE_ROLE_STRUCT_ANCHORED_DELAY anchoredDelay; - ROSE_ROLE_STRUCT_SOM_ADJUST somAdjust; - ROSE_ROLE_STRUCT_SOM_LEFTFIX somLeftfix; - ROSE_ROLE_STRUCT_TRIGGER_INFIX triggerInfix; - ROSE_ROLE_STRUCT_TRIGGER_SUFFIX triggerSuffix; - ROSE_ROLE_STRUCT_REPORT report; - ROSE_ROLE_STRUCT_REPORT_CHAIN reportChain; - ROSE_ROLE_STRUCT_REPORT_EOD reportEod; - ROSE_ROLE_STRUCT_REPORT_SOM_INT reportSomInt; - ROSE_ROLE_STRUCT_REPORT_SOM reportSom; - ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; - ROSE_ROLE_STRUCT_SET_STATE setState; - ROSE_ROLE_STRUCT_SET_GROUPS setGroups; - ROSE_ROLE_STRUCT_END end; + ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; + ROSE_STRUCT_CHECK_BOUNDS checkBounds; + ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; + ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; + ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix; + ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; + ROSE_STRUCT_SOM_ADJUST somAdjust; + ROSE_STRUCT_SOM_LEFTFIX somLeftfix; + ROSE_STRUCT_TRIGGER_INFIX triggerInfix; + ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix; + ROSE_STRUCT_REPORT report; + ROSE_STRUCT_REPORT_CHAIN reportChain; + ROSE_STRUCT_REPORT_EOD reportEod; + ROSE_STRUCT_REPORT_SOM_INT reportSomInt; + ROSE_STRUCT_REPORT_SOM reportSom; + ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; + ROSE_STRUCT_SET_STATE setState; + ROSE_STRUCT_SET_GROUPS setGroups; + ROSE_STRUCT_END end; } u; }; struct build_context : boost::noncopyable { - /** \brief Rose Role information. - * These entries are filled in by a number of functions as other tables are - * created. - */ - vector roleTable; - - /** \brief Role program mapping, keyed by index in roleTable. */ - vector> rolePrograms; - /** \brief minimum depth in number of hops from root/anchored root. */ map depths; /** \brief information about engines to the left of a vertex */ map leftfix_info; + /** \brief mapping from suffix to queue index. */ + map suffixes; + + /** \brief Mapping from vertex to key, for vertices with a + * CHECK_NOT_HANDLED instruction. */ + ue2::unordered_map handledKeys; + /** \brief Number of roles with a state bit. - * This set by buildInitialRoleTable() and should be constant throughout + * + * This is set by assignStateIndices() and should be constant throughout * the rest of the compile. */ size_t numStates = 0; - // Very simple cache from sparse iter to offset, used when building up - // iterators in early misc. + /** \brief Very simple cache from sparse iter to offset, used when building + * up iterators in early misc. */ map, u32> iterCache; - /** \brief maps RoseRole index to a list of RosePred indices */ - map > rolePredecessors; + /** \brief LookEntry list cache, so that we don't have to go scanning + * through the full list to find cases we've used already. */ + ue2::unordered_map, size_t> lookaround_cache; /** \brief Lookaround table for Rose roles. */ vector lookaround; - /** \brief Map from literal final ID to a set of non-root role IDs. */ - ue2::unordered_map> litNonRootRoles; - /** \brief State indices, for those roles that have them. */ ue2::unordered_map roleStateIndices; @@ -401,18 +401,6 @@ const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { return n; } -/* vertex ordered by their role index */ -static -vector get_ordered_verts(const RoseGraph &g) { - vector verts; - insert(&verts, verts.end(), vertices_range(g)); - sort(verts.begin(), verts.end(), - [&g](const RoseVertex &a, const RoseVertex &b) { - return g[a].role < g[b].role; - }); - return verts; -} - static u32 countRosePrefixes(const vector &roses) { u32 num = 0; @@ -1255,9 +1243,8 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, } static -void findSuffixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, - map *suffixes) { - const RoseGraph &g = tbi.g; +void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { + const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].suffix) { @@ -1269,13 +1256,13 @@ void findSuffixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph()); // We may have already built this NFA. - if (contains(*suffixes, s)) { + if (contains(bc.suffixes, s)) { continue; } - u32 queue = qif.get_queue(); + u32 queue = build.qif.get_queue(); DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue); - suffixes->insert(make_pair(s, queue)); + bc.suffixes.emplace(s, queue); } } @@ -1300,7 +1287,6 @@ void setSuffixProperties(NFA &n, const suffix_id &suff, static bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, - map *suffixes, set *no_retrigger_queues) { map > suffixTriggers; findSuffixTriggers(tbi, &suffixTriggers); @@ -1309,7 +1295,7 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, // (unique) queue indices, so that we call add_nfa_to_blob in the same // order. vector> ordered; - for (const auto &e : *suffixes) { + for (const auto &e : bc.suffixes) { ordered.emplace_back(e.second, e.first); } sort(begin(ordered), end(ordered)); @@ -1404,11 +1390,10 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { static bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, - map *suffixes, set *no_retrigger_queues, u32 *leftfixBeginQueue) { - findSuffixes(tbi, qif, suffixes); + assignSuffixQueues(tbi, bc); - if (!buildSuffixes(tbi, bc, suffixes, no_retrigger_queues)) { + if (!buildSuffixes(tbi, bc, no_retrigger_queues)) { return false; } @@ -2242,24 +2227,6 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { return addIteratorToTable(bc, iter); } -#ifdef DEBUG -static -const char *describeHistory(RoseRoleHistory history) { - switch (history) { - case ROSE_ROLE_HISTORY_NONE: - return "NONE"; - case ROSE_ROLE_HISTORY_ANCH: - return "ANCH (previous role at fixed offset)"; - case ROSE_ROLE_HISTORY_LAST_BYTE: - return "LAST_BYTE (previous role matches only at EOD)"; - case ROSE_ROLE_HISTORY_INVALID: - return "INVALID"; - } - assert(0); - return "UNKNOWN"; -} -#endif - static void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) { // Global limit. @@ -2308,13 +2275,12 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { static void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, const QueueIndexFactory &qif, - const map &suffixes, vector *out) { out->resize(qif.allocated_count()); map > qi_to_ekeys; /* for determinism */ - for (const auto &e : suffixes) { + for (const auto &e : bc.suffixes) { const suffix_id &s = e.first; u32 qi = e.second; set ekeys = reportsToEkeys(all_reports(s), tbi.rm); @@ -2396,7 +2362,6 @@ bool hasInternalReport(const set &reports, const ReportManager &rm) { static void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, const vector &outfixes, - const map &suffixes, const vector &ekeyListOffsets, const set &no_retrigger_queues, NfaInfo *infos) { @@ -2422,7 +2387,7 @@ void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, } // Mark suffixes that only trigger external reports. - for (const auto &e : suffixes) { + for (const auto &e : bc.suffixes) { const suffix_id &s = e.first; u32 qi = e.second; @@ -2437,7 +2402,7 @@ void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, if (!g[v].suffix) { continue; } - u32 qi = suffixes.at(g[v].suffix); + u32 qi = bc.suffixes.at(g[v].suffix); if (build.isInETable(v)) { infos[qi].eod = 1; } @@ -2622,46 +2587,61 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { * fail_jump/done_jump targets set correctly. */ static -vector -flattenRoleProgram(const vector> &program) { - vector out; +vector +flattenRoleProgram(const vector> &programs) { + vector out; vector offsets; // offset of each instruction (bytes) vector targets; // jump target for each instruction + DEBUG_PRINTF("%zu programs\n", programs.size()); + size_t curr_offset = 0; - for (const auto &prog : program) { - for (const auto &ri : prog) { + for (const auto &program : programs) { + DEBUG_PRINTF("program with %zu instructions\n", program.size()); + for (const auto &ri : program) { out.push_back(ri); offsets.push_back(curr_offset); curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } - for (size_t i = 0; i < prog.size(); i++) { + for (size_t i = 0; i < program.size(); i++) { targets.push_back(curr_offset); } } // Add an END instruction. - out.emplace_back(ROSE_ROLE_INSTR_END); + out.emplace_back(ROSE_INSTR_END); offsets.push_back(curr_offset); targets.push_back(curr_offset); + assert(targets.size() == out.size()); + assert(offsets.size() == out.size()); + for (size_t i = 0; i < out.size(); i++) { auto &ri = out[i]; switch (ri.code()) { - case ROSE_ROLE_INSTR_ANCHORED_DELAY: + case ROSE_INSTR_ANCHORED_DELAY: + assert(targets[i] > offsets[i]); // jumps always progress ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: + case ROSE_INSTR_CHECK_ONLY_EOD: + assert(targets[i] > offsets[i]); ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: - ri.u.checkRootBounds.fail_jump = targets[i] - offsets[i]; + case ROSE_INSTR_CHECK_BOUNDS: + assert(targets[i] > offsets[i]); + ri.u.checkBounds.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_INSTR_CHECK_NOT_HANDLED: + assert(targets[i] > offsets[i]); + ri.u.checkNotHandled.fail_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: + case ROSE_INSTR_CHECK_LOOKAROUND: + assert(targets[i] > offsets[i]); ri.u.checkLookaround.fail_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_LEFTFIX: + case ROSE_INSTR_CHECK_LEFTFIX: + assert(targets[i] > offsets[i]); ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i]; break; default: @@ -2673,8 +2653,9 @@ flattenRoleProgram(const vector> &program) { } static -u32 writeRoleProgram(build_context &bc, vector &program) { +u32 writeRoleProgram(build_context &bc, vector &program) { DEBUG_PRINTF("writing %zu instructions\n", program.size()); + u32 programOffset = 0; for (const auto &ri : program) { u32 offset = @@ -2685,52 +2666,10 @@ u32 writeRoleProgram(build_context &bc, vector &program) { programOffset = offset; } } + DEBUG_PRINTF("program begins at offset %u\n", programOffset); return programOffset; } -static -void buildRootRolePrograms(const RoseBuildImpl &build, build_context &bc, - vector &literalTable) { - for (u32 id = 0; id < literalTable.size(); id++) { - DEBUG_PRINTF("lit %u\n", id); - const auto &lit_info = **getLiteralInfoByFinalId(build, id).begin(); - - flat_set root_roles; // with programs to run. - - for (RoseVertex v : lit_info.vertices) { - if (!build.isRootSuccessor(v)) { - continue; - } - if (build.hasDirectFinalId(v)) { - DEBUG_PRINTF("[skip root role %u as direct]\n", - build.g[v].role); - continue; - } - DEBUG_PRINTF("root role %u\n", build.g[v].role); - root_roles.insert(build.g[v].role); - } - - vector> root_prog; - for (const auto &role : root_roles) { - assert(role < bc.rolePrograms.size()); - const auto &role_prog = bc.rolePrograms[role]; - if (role_prog.empty()) { - continue; - } - root_prog.push_back(role_prog); - } - - RoseLiteral &tl = literalTable[id]; - if (root_prog.empty()) { - tl.rootProgramOffset = 0; - continue; - } - - auto final_program = flattenRoleProgram(root_prog); - tl.rootProgramOffset = writeRoleProgram(bc, final_program); - } -} - static void buildActiveLeftIter(const vector &leftTable, vector &out) { @@ -2780,27 +2719,6 @@ bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, return false; } -static -void fetchEodAnchors(map > &eods, - const RoseGraph &g) { - for (auto v : vertices_range(g)) { - if (!g[v].eod_accept) { - continue; - } - - DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].idx, - in_degree(v, g)); - - assert(!g[v].reports.empty()); - for (const auto r : g[v].reports) { - // In-edges go into eod list. - for (const auto &e : in_edges_range(v, g)) { - eods[r].push_back(e); - } - } - } -} - /* creates (and adds to rose) a sparse iterator visiting pred states/roles, * returns a pair: * - the offset of the itermap @@ -2808,10 +2726,12 @@ void fetchEodAnchors(map > &eods, */ static pair addPredSparseIter(build_context &bc, - const map > &predStates) { + const map &predPrograms) { vector keys; - for (u32 k : predStates | map_keys) { - keys.push_back(k); + vector programTable; + for (const auto &elem : predPrograms) { + keys.push_back(elem.first); + programTable.push_back(elem.second); } vector iter; @@ -2819,121 +2739,10 @@ pair addPredSparseIter(build_context &bc, assert(!iter.empty()); DEBUG_PRINTF("iter size = %zu\n", iter.size()); - // Build mapping tables and add to iter table u32 iterOffset = addIteratorToTable(bc, iter); - - vector itermap; - for (const auto &p : predStates) { - u32 iterRoleOffset = add_to_engine_blob(bc, p.second.begin(), - p.second.end()); - itermap.push_back(RoseIterMapping()); - itermap.back().offset = iterRoleOffset; - itermap.back().count = verify_u32(p.second.size()); - } - u32 iterMapOffset = add_to_engine_blob(bc, itermap.begin(), itermap.end()); - - return make_pair(iterMapOffset, iterOffset); -} - -static -void createPred(const RoseBuildImpl &tbi, build_context &bc, - const RoseEdge &e, vector &predTable) { - const RoseGraph &g = tbi.g; - - DEBUG_PRINTF("building pred %zu of type %s\n", predTable.size(), - describeHistory(g[e].history)); - RoseVertex u = source(e, g); - RoseVertex v = target(e, g); - - u32 lit_length = 0; - if (!g[v].eod_accept) { - // Use the minimum literal length. - lit_length = verify_u32(tbi.minLiteralLen(v)); - } - - bc.rolePredecessors[g[v].role].push_back(verify_u32(predTable.size())); - - predTable.push_back(RosePred()); - RosePred &tp = predTable.back(); - memset(&tp, 0, sizeof(tp)); - tp.role = g[u].role; - tp.minBound = g[e].minBound + lit_length; - tp.maxBound = g[e].maxBound == ROSE_BOUND_INF ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; - - // Find the history scheme appropriate to this edge. Note that these may be - // updated later, as the history collected by the predecessor role is - // dependent on all its out edges. - tp.historyCheck = g[e].history; - if (tp.historyCheck == ROSE_ROLE_HISTORY_ANCH) { - assert(g[u].max_offset != ROSE_BOUND_INF); - /* pred role does not need to know about history scheme */ - DEBUG_PRINTF("absing (%u,%u + %u) u%u/%zu v%u/%zu\n", tp.minBound, - tp.maxBound, g[u].max_offset, g[u].role, g[u].idx, - g[v].role, g[v].idx); - tp.minBound += g[u].max_offset; /* make absolute */ - if (tp.maxBound != ROSE_BOUND_INF) { - tp.maxBound += g[u].max_offset; /* make absolute */ - } - } - - if (tp.historyCheck == ROSE_ROLE_HISTORY_NONE) { - tp.minBound = 0; - } - - DEBUG_PRINTF("built pred %zu of %u %u %hhu:%s\n", predTable.size() - 1, - tp.minBound, tp.maxBound, tp.historyCheck, - describeHistory((RoseRoleHistory)tp.historyCheck)); -} - -/* returns a pair containing the iter map offset and iter offset */ -static -pair buildEodAnchorRoles(RoseBuildImpl &tbi, build_context &bc, - vector &predTable) { - const RoseGraph &g = tbi.g; - map > eods; - fetchEodAnchors(eods, g); - - if (eods.empty()) { - DEBUG_PRINTF("no EOD anchors\n"); - return {0, 0}; - } - - // pred state id -> role/pred entries - map > predStates; - - for (const auto &er : eods) { - // Create a role to fire this particular report. - DEBUG_PRINTF("creating EOD accept role %zu for report %u\n", - bc.roleTable.size(), er.first); - bc.roleTable.push_back(RoseRole()); - RoseRole &tr = bc.roleTable.back(); - memset(&tr, 0, sizeof(tr)); - - bc.rolePrograms.push_back({}); - auto &program = bc.rolePrograms.back(); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_EOD); - ri.u.report.report = er.first; - program.push_back(ri); - - // Collect the state IDs of this report's vertices to add to the EOD - // sparse iterator, creating pred entries appropriately. - for (const auto &e : er.second) { - RoseVertex v = source(e, g); - DEBUG_PRINTF("vertex %zu has role %u\n", g[v].idx, g[v].role); - assert(contains(bc.roleStateIndices, v)); - u32 predStateIdx = bc.roleStateIndices.at(v); - - createPred(tbi, bc, e, predTable); - RoseIterRole ir = { - (u32)(bc.roleTable.size() - 1), - (u32)(predTable.size() - 1) - }; - predStates[predStateIdx].push_back(ir); - } - } - - return addPredSparseIter(bc, predStates); + u32 programTableOffset = + add_to_engine_blob(bc, begin(programTable), end(programTable)); + return make_pair(programTableOffset, iterOffset); } static @@ -3087,8 +2896,7 @@ bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program, - ue2::unordered_map, size_t> &lookaround_cache) { + vector &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -3113,20 +2921,20 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, DEBUG_PRINTF("role has lookaround\n"); u32 look_idx; - auto it = lookaround_cache.find(look); - if (it != lookaround_cache.end()) { + auto it = bc.lookaround_cache.find(look); + if (it != bc.lookaround_cache.end()) { DEBUG_PRINTF("reusing look at idx %zu\n", it->second); look_idx = verify_u32(it->second); } else { size_t idx = bc.lookaround.size(); - lookaround_cache.emplace(look, idx); + bc.lookaround_cache.emplace(look, idx); insert(&bc.lookaround, bc.lookaround.end(), look); DEBUG_PRINTF("adding look at idx %zu\n", idx); look_idx = verify_u32(idx); } u32 look_count = verify_u32(look.size()); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LOOKAROUND); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND); ri.u.checkLookaround.index = look_idx; ri.u.checkLookaround.count = look_count; program.push_back(ri); @@ -3134,7 +2942,7 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + vector &program) { auto it = bc.leftfix_info.find(v); if (it == end(bc.leftfix_info)) { return; @@ -3147,7 +2955,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!build.cc.streaming || build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LEFTFIX); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX); ri.u.checkLeftfix.queue = lni.queue; ri.u.checkLeftfix.lag = build.g[v].left.lag; ri.u.checkLeftfix.report = build.g[v].left.leftfix_report; @@ -3156,7 +2964,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { + RoseVertex v, vector &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { return; @@ -3165,7 +2973,7 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, // TODO: also limit to matches that can occur after // floatingMinLiteralMatchOffset. - auto ri = RoleInstruction(ROSE_ROLE_INSTR_ANCHORED_DELAY); + auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY); ri.u.anchoredDelay.depth = (u8)min(254U, bc.depths.at(v)); ri.u.anchoredDelay.groups = build.g[v].groups; program.push_back(ri); @@ -3173,7 +2981,7 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, static void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + vector &program) { const auto &g = build.g; /* we are a suffaig - need to update role to provide som to the @@ -3182,13 +2990,13 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, if (g[v].left.tracksSom()) { assert(contains(bc.leftfix_info, v)); const left_build_info &lni = bc.leftfix_info.at(v); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_LEFTFIX); + auto ri = RoseInstruction(ROSE_INSTR_SOM_LEFTFIX); ri.u.somLeftfix.queue = lni.queue; ri.u.somLeftfix.lag = g[v].left.lag; program.push_back(ri); has_som = true; } else if (g[v].som_adjust) { - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_ADJUST); + auto ri = RoseInstruction(ROSE_INSTR_SOM_ADJUST); ri.u.somAdjust.distance = g[v].som_adjust; program.push_back(ri); has_som = true; @@ -3199,19 +3007,17 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(id < build.rm.numReports()); const Report &ir = build.rm.getReport(id); if (isInternalSomReport(ir)) { - auto ri = - RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM - : ROSE_ROLE_INSTR_REPORT_SOM_INT); + auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM + : ROSE_INSTR_REPORT_SOM_INT); ri.u.report.report = id; program.push_back(ri); } else if (ir.type == INTERNAL_ROSE_CHAIN) { - auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_CHAIN); + auto ri = RoseInstruction(ROSE_INSTR_REPORT_CHAIN); ri.u.report.report = id; program.push_back(ri); } else { - auto ri = - RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM_KNOWN - : ROSE_ROLE_INSTR_REPORT); + auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM_KNOWN + : ROSE_INSTR_REPORT); ri.u.report.report = id; program.push_back(ri); } @@ -3220,14 +3026,13 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - const map &suffixes, - vector &program) { + vector &program) { const auto &g = build.g; if (!g[v].suffix) { return; } - assert(contains(suffixes, g[v].suffix)); - u32 qi = suffixes.at(g[v].suffix); + assert(contains(bc.suffixes, g[v].suffix)); + u32 qi = bc.suffixes.at(g[v].suffix); assert(contains(bc.engineOffsets, qi)); const NFA *nfa = get_nfa_from_blob(bc, qi); u32 suffixEvent; @@ -3242,7 +3047,7 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); suffixEvent = MQE_TOP; } - auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_SUFFIX); + auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_SUFFIX); ri.u.triggerSuffix.queue = qi; ri.u.triggerSuffix.event = suffixEvent; program.push_back(ri); @@ -3250,21 +3055,21 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleGroups(const rose_group &groups, - vector &program) { + vector &program) { if (!groups) { return; } - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_GROUPS); + auto ri = RoseInstruction(ROSE_INSTR_SET_GROUPS); ri.u.setGroups.groups = groups; program.push_back(ri); } static void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, - RoseVertex u, vector &program) { + RoseVertex u, vector &program) { const auto &g = build.g; - vector infix_program; + vector infix_program; for (const auto &e : out_edges_range(u, g)) { RoseVertex v = target(e, g); @@ -3290,7 +3095,7 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, assert(top < MQE_INVALID); } - auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_INFIX); + auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_INFIX); ri.u.triggerInfix.queue = lbi.queue; ri.u.triggerInfix.event = top; ri.u.triggerInfix.cancel = g[e].rose_cancel_prev_top; @@ -3312,165 +3117,28 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, } static -void makeRoleSetState(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program, - u32 *nextStateIndex) { - const auto &g = build.g; - - // Leaf nodes don't need state indices, as they don't have successors. - if (isLeafNode(v, g)) { +void makeRoleSetState(const build_context &bc, RoseVertex v, + vector &program) { + // We only need this instruction if a state index has been assigned to this + // vertex. + auto it = bc.roleStateIndices.find(v); + if (it == end(bc.roleStateIndices)) { return; } - /* TODO: also don't need a state index if all edges are nfa based */ - - u32 idx = (*nextStateIndex)++; - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_STATE); + u32 idx = it->second; + auto ri = RoseInstruction(ROSE_INSTR_SET_STATE); ri.u.setState.index = idx; ri.u.setState.depth = (u8)min(254U, bc.depths.at(v)); program.push_back(ri); - bc.roleStateIndices.emplace(v, idx); -} - -static -void createRoleEntry(RoseBuildImpl &tbi, build_context &bc, - RoseVertex v, vector &roleTable, - ue2::unordered_map, size_t> &lookaround_cache, - const map &suffixes, u32 *nextStateIndex) { - RoseGraph &g = tbi.g; - - // set role ID in the graph where we can find it later - u32 roleId = verify_u32(roleTable.size()); - g[v].role = roleId; - // track id if it's a nonroot role for use in buildSparseIter - if (!tbi.isRootSuccessor(v)) { - for (const auto &lit_id : g[v].literals) { - u32 final_id = tbi.literal_info.at(lit_id).final_id; - bc.litNonRootRoles[final_id].insert(roleId); - } - } - - roleTable.push_back(RoseRole()); - RoseRole &tr = roleTable.back(); - memset(&tr, 0, sizeof(tr)); - - DEBUG_PRINTF("creating role %u for i%zu, eod %u, s (%p,%p)\n", roleId, - g[v].idx, (u32)g[v].eod_accept, g[v].suffix.graph.get(), - g[v].suffix.haig.get()); - - // Build role program. - - assert(bc.rolePrograms.size() == roleId); - bc.rolePrograms.push_back({}); - vector &program = bc.rolePrograms.back(); - - // First, add program instructions that enforce preconditions without - // effects. - - makeRoleAnchoredDelay(tbi, bc, v, program); - - if (onlyAtEod(tbi, v)) { - DEBUG_PRINTF("only at eod\n"); - program.push_back(RoleInstruction(ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); - } - - makeRoleLookaround(tbi, bc, v, program, lookaround_cache); - makeRoleCheckLeftfix(tbi, bc, v, program); - - // Next, we can add program instructions that have effects. - - makeRoleReports(tbi, bc, v, program); - makeRoleInfixTriggers(tbi, bc, v, program); - makeRoleSuffix(tbi, bc, v, suffixes, program); - makeRoleSetState(tbi, bc, v, program, nextStateIndex); - makeRoleGroups(g[v].groups, program); -} - -static -void writeRolePrograms(build_context &bc) { - assert(bc.roleTable.size() == bc.rolePrograms.size()); - - for (size_t i = 0; i < bc.roleTable.size(); i++) { - auto &role = bc.roleTable[i]; - auto &program = bc.rolePrograms[i]; - - if (program.empty()) { - role.programOffset = 0; - continue; - } - - // Safety check: all precondition checks should occur before - // instructions with effects. - assert(is_partitioned( - begin(program), end(program), [](const RoleInstruction &ri) { - // CHECK_LEFTFIX is the last precondition check. - return ri.code() <= ROSE_ROLE_INSTR_CHECK_LEFTFIX; - })); - - // Apply jump fixups. - auto final_program = flattenRoleProgram({program}); - - // Write into bytecode. - role.programOffset = writeRoleProgram(bc, final_program); - } -} - -// Construct an initial role table containing the basic role information. -static -void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc, - const map &suffixes) { - DEBUG_PRINTF("building role table\n"); - - const RoseGraph &g = tbi.g; - vector &roleTable = bc.roleTable; - - // Create a list of vertices, ordered by depth. - vector verts; - insert(&verts, verts.end(), vertices(g)); - sort(begin(verts), end(verts), [&bc, &g](const RoseVertex &a, - const RoseVertex &b) { - return tie(bc.depths.at(a), g[a].idx) < tie(bc.depths.at(b), g[b].idx); - }); - - // LookEntry list cache, so that we don't have to go scanning through the - // full list to find cases we've used already. - ue2::unordered_map, size_t> lookaround_cache; - - // Write a role entry for every vertex that represents a real literal. - // Direct reports are skipped. - // We start the state indices from one after the last one used (on the - // anchored root, if it exists). - u32 stateIndex = verify_u32(roleTable.size()); - - for (RoseVertex v : verts) { - if (tbi.isVirtualVertex(v)) { - DEBUG_PRINTF("vertex idx=%zu is virtual\n", g[v].idx); - continue; - } - if (tbi.hasDirectFinalId(v)) { - DEBUG_PRINTF("vertex idx=%zu is direct report\n", g[v].idx); - continue; - } - - assert(!g[v].literals.empty()); - createRoleEntry(tbi, bc, v, roleTable, lookaround_cache, suffixes, - &stateIndex); - } - - bc.numStates = stateIndex; - DEBUG_PRINTF("wrote %zu roles with %u states\n", roleTable.size(), - stateIndex); } static -void makeRoleCheckRootBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, - vector &program) { +void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, vector &program) { const RoseGraph &g = build.g; const RoseVertex u = source(e, g); - assert(u == build.root || u == build.anchored_root); - // Use the minimum literal length. u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); @@ -3491,95 +3159,141 @@ void makeRoleCheckRootBounds(const RoseBuildImpl &build, RoseVertex v, assert(max_bound <= ROSE_BOUND_INF); assert(min_bound <= max_bound); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS); - ri.u.checkRootBounds.min_bound = min_bound; - ri.u.checkRootBounds.max_bound = max_bound; + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS); + ri.u.checkBounds.min_bound = min_bound; + ri.u.checkBounds.max_bound = max_bound; // This precondition instruction should go near the start of // the program, after the ONLY_EOD check if it's present. auto it = - find_if(begin(program), end(program), [](const RoleInstruction &ri) { - return ri.code() > ROSE_ROLE_INSTR_CHECK_ONLY_EOD; + find_if(begin(program), end(program), [](const RoseInstruction &ri) { + return ri.code() > ROSE_INSTR_CHECK_ONLY_EOD; }); program.insert(it, ri); } -// Construct pred table and sparse iterators over preds. static -void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, - vector &predTable) { - const RoseGraph &g = tbi.g; +vector makeRoleProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { + const RoseGraph &g = build.g; + auto v = target(e, g); - // We write our preds out in role index order just to give things some - // repeatability. - vector verts = get_ordered_verts(g); + vector program; - for (RoseVertex v : verts) { - if (tbi.isAnyStart(v) || g[v].role == MO_INVALID_IDX) { - continue; - } + // First, add program instructions that enforce preconditions without + // effects. - assert(g[v].role < bc.roleTable.size()); - RoseRole &tr = bc.roleTable.at(g[v].role); + makeRoleAnchoredDelay(build, bc, v, program); - // Assumption: if a vertex is a root role, it must have only one - // predecessor. - assert(!tbi.isRootSuccessor(v) || in_degree(v, g) == 1); + if (onlyAtEod(build, v)) { + DEBUG_PRINTF("only at eod\n"); + program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD)); + } - // Check if we can use a "simple" check, i.e. one pred, bounds [0, - // inf], no overlap and not anchor->float transition. - if (in_degree(v, g) == 1) { - const RoseEdge &e = *in_edges(v, g).first; - RoseVertex u = source(e, g); - DEBUG_PRINTF("single edge: (role=%u)->(role=%u) with bounds " - "[%u, %u]\n", g[u].role, g[v].role, g[e].minBound, - g[e].maxBound); - if (tbi.isAnyStart(u)) { - // Solely root roles can be handled with no check at all (for - // very simple cases), or a bounds check in the role program. - assert(u != tbi.root || g[e].maxBound == ROSE_BOUND_INF); - if (u == tbi.root && g[e].minBound == 0) { - DEBUG_PRINTF("root role with .* edge, no pred needed\n"); - continue; /* no pred required */ - } + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } - tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - auto &program = bc.rolePrograms[g[v].role]; - makeRoleCheckRootBounds(tbi, v, e, program); - continue; - } + makeRoleLookaround(build, bc, v, program); + makeRoleCheckLeftfix(build, bc, v, program); - assert(!g[u].literals.empty() && !g[v].literals.empty()); - bool pseudo_delay_history = true; - for (u32 ul : g[u].literals) { - pseudo_delay_history = !!tbi.literals.right.at(ul).delay; - } - if (!pseudo_delay_history) { - DEBUG_PRINTF("max_overlap = %zu\n", - tbi.maxLiteralOverlap(u, v)); + // Next, we can add program instructions that have effects. + + makeRoleReports(build, bc, v, program); + makeRoleInfixTriggers(build, bc, v, program); + makeRoleSuffix(build, bc, v, program); + makeRoleSetState(bc, v, program); + makeRoleGroups(g[v].groups, program); + + return program; +} + +static +void findRootEdges(const RoseBuildImpl &build, RoseVertex src, + map> &root_edges_map) { + const auto &g = build.g; + for (const auto &e : out_edges_range(src, g)) { + const auto &v = target(e, g); + if (build.hasDirectFinalId(v)) { + continue; // Skip direct reports. + } + for (auto lit_id : g[v].literals) { + assert(lit_id < build.literal_info.size()); + u32 final_id = build.literal_info.at(lit_id).final_id; + if (final_id != MO_INVALID_IDX) { + root_edges_map[final_id].insert(e); } - if (g[e].minBound == 0 && g[e].maxBound == ROSE_BOUND_INF - && (pseudo_delay_history || !tbi.maxLiteralOverlap(u, v))) { - tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - tr.flags |= ROSE_ROLE_PRED_SIMPLE; - bc.rolePredecessors[g[v].role].push_back(g[u].role); + } + } +} + +static +void buildRootRolePrograms(RoseBuildImpl &build, build_context &bc, + vector &literalTable) { + const auto &g = build.g; + + map> root_edges_map; // lit id -> root edges + findRootEdges(build, build.root, root_edges_map); + findRootEdges(build, build.anchored_root, root_edges_map); + + for (u32 id = 0; id < literalTable.size(); id++) { + const auto &root_edges = root_edges_map[id]; + DEBUG_PRINTF("lit %u has %zu root edges\n", id, root_edges.size()); + + // Sort edges by (source, target) vertex indices to ensure + // deterministic program construction. + vector ordered_edges(begin(root_edges), end(root_edges)); + sort(begin(ordered_edges), end(ordered_edges), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].idx, g[target(a, g)].idx) < + tie(g[source(b, g)].idx, g[target(b, g)].idx); + }); + + vector> root_prog; + for (const auto &e : ordered_edges) { + DEBUG_PRINTF("edge (%zu,%zu)\n", g[source(e, g)].idx, + g[target(e, g)].idx); + auto role_prog = makeRoleProgram(build, bc, e); + if (role_prog.empty()) { continue; } + root_prog.push_back(role_prog); } - assert(in_degree(v, g) >= 1); - tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - tr.flags |= ROSE_ROLE_PRED_ANY; + RoseLiteral &tl = literalTable[id]; + if (root_prog.empty()) { + tl.rootProgramOffset = 0; + continue; + } - // Collect in-edges, ordered by the state index of the predecessor. - vector edges = make_vector_from(in_edges(v, g)); - sort(edges.begin(), edges.end(), - EdgeSourceStateCompare(g, bc.roleStateIndices)); + auto final_program = flattenRoleProgram(root_prog); + tl.rootProgramOffset = writeRoleProgram(bc, final_program); + } +} - for (const auto &e : edges) { - createPred(tbi, bc, e, predTable); +static +void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { + const auto &g = build.g; + + u32 state = 0; + + for (auto v : vertices_range(g)) { + // Virtual vertices (starts, EOD accept vertices) never need state + // indices. + if (build.isVirtualVertex(v)) { + continue; + } + // Leaf nodes don't need state indices, as they don't have successors. + if (isLeafNode(v, g)) { + continue; } + /* TODO: also don't need a state index if all edges are nfa based */ + bc.roleStateIndices.emplace(v, state++); } + + DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state, + num_vertices(g)); + bc.numStates = state; } static @@ -3606,8 +3320,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, u32 lagIndex = 0; - vector verts = get_ordered_verts(g); - for (RoseVertex v : verts) { + for (RoseVertex v : vertices_range(g)) { if (!g[v].left) { continue; } @@ -3683,60 +3396,188 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, *laggedRoseCount = lagIndex; } +static +void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, + vector &program) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED); + + u32 handled_key; + if (contains(bc.handledKeys, v)) { + handled_key = bc.handledKeys.at(v); + } else { + handled_key = verify_u32(bc.handledKeys.size()); + bc.handledKeys.emplace(v, handled_key); + } + + ri.u.checkNotHandled.key = handled_key; + + // This program may be triggered by different predecessors, with different + // offset bounds. We must ensure we put this check/set operation after the + // bounds check to deal with this case. + auto it = + find_if(begin(program), end(program), [](const RoseInstruction &ri) { + return ri.code() > ROSE_INSTR_CHECK_BOUNDS; + }); + program.insert(it, ri); +} + +static +vector makeSparseIterProgram(RoseBuildImpl &build, + build_context &bc, + const RoseEdge &e) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + auto program = makeRoleProgram(build, bc, e); + + if (hasGreaterInDegree(1, v, g)) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(bc, v, program); + } + + return program; +} + +static +void buildLitSparseIter(RoseBuildImpl &build, build_context &bc, + vector &verts, RoseLiteral &tl) { + const auto &g = build.g; + + if (verts.empty()) { + // This literal has no non-root roles => no sparse iter + tl.iterOffset = ROSE_OFFSET_INVALID; + tl.iterProgramOffset = 0; + return; + } + + // Deterministic ordering. + sort(begin(verts), end(verts), + [&g](RoseVertex a, RoseVertex b) { return g[a].idx < g[b].idx; }); + + // pred state id -> list of programs + map>> predProgramLists; + + for (const auto &v : verts) { + DEBUG_PRINTF("vertex %zu\n", g[v].idx); + for (const auto &e : in_edges_range(v, g)) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. + } + + assert(contains(bc.roleStateIndices, u)); + u32 pred_state = bc.roleStateIndices.at(u); + + DEBUG_PRINTF("pred %zu (state %u)\n", g[u].idx, pred_state); + + auto program = makeSparseIterProgram(build, bc, e); + predProgramLists[pred_state].push_back(program); + } + } + + map predPrograms; + for (const auto &e : predProgramLists) { + auto program = flattenRoleProgram(e.second); + u32 offset = writeRoleProgram(bc, program); + predPrograms.emplace(e.first, offset); + } + + tie(tl.iterProgramOffset, tl.iterOffset) = + addPredSparseIter(bc, predPrograms); +} + // Build sparse iterators for literals. static void buildSparseIter(RoseBuildImpl &build, build_context &bc, - vector &literalTable, - const vector &predTable) { + vector &literalTable) { const RoseGraph &g = build.g; - // Construct a mapping from role ids to state indices. - ue2::unordered_map role_to_state; - for (const auto &m : bc.roleStateIndices) { - role_to_state.emplace(g[m.first].role, m.second); + // Find all our non-root roles. + ue2::unordered_map> litNonRootVertices; + for (const auto &v : vertices_range(g)) { + if (build.isRootSuccessor(v)) { + continue; + } + for (const auto &lit_id : g[v].literals) { + u32 final_id = build.literal_info.at(lit_id).final_id; + litNonRootVertices[final_id].push_back(v); + } } for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { - RoseLiteral &tl = literalTable[finalId]; + buildLitSparseIter(build, bc, litNonRootVertices[finalId], + literalTable[finalId]); + } +} + +static +vector makeEodAnchorProgram(RoseBuildImpl &build, + build_context &bc, + const RoseEdge &e) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + vector program; - if (!contains(bc.litNonRootRoles, finalId)) { - // This literal has no nonroot roles => no sparse iter - tl.iterOffset = ROSE_OFFSET_INVALID; - tl.iterMapOffset = ROSE_OFFSET_INVALID; + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + if (hasGreaterInDegree(1, v, g)) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(bc, v, program); + } + + for (const auto &report : g[v].reports) { + auto ri = RoseInstruction(ROSE_INSTR_REPORT_EOD); + ri.u.report.report = report; + program.push_back(ri); + } + + return program; +} + +/* returns a pair containing the iter map offset and iter offset */ +static +pair buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) { + const RoseGraph &g = build.g; + + // pred state id -> list of programs + map>> predProgramLists; + + for (auto v : vertices_range(g)) { + if (!g[v].eod_accept) { continue; } - const auto &roles = bc.litNonRootRoles.at(finalId); - assert(!roles.empty()); + DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].idx, + in_degree(v, g)); - // Collect the state IDs of the predecessors of the roles of this - // literal. + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); - // pred state id -> role/pred entries - map > predStates; + assert(contains(bc.roleStateIndices, u)); + u32 predStateIdx = bc.roleStateIndices.at(u); - for (u32 r : roles) { - const RoseRole &tr = bc.roleTable.at(r); - if (tr.flags & ROSE_ROLE_PRED_SIMPLE) { - u32 p = bc.rolePredecessors.at(r)[0]; - assert(p != ROSE_OFFSET_INVALID); - RoseIterRole ir = { r, ROSE_OFFSET_INVALID }; - assert(contains(role_to_state, p)); - predStates[role_to_state.at(p)].push_back(ir); - } else { - const vector &myPreds = bc.rolePredecessors.at(r); - for (u32 pred_entry : myPreds) { - u32 p = predTable.at(pred_entry).role; - RoseIterRole ir = { r, pred_entry }; - assert(p < bc.roleTable.size()); - assert(contains(role_to_state, p)); - predStates[role_to_state.at(p)].push_back(ir); - } - } + auto program = makeEodAnchorProgram(build, bc, e); + predProgramLists[predStateIdx].push_back(program); } + } + + if (predProgramLists.empty()) { + DEBUG_PRINTF("no eod anchored roles\n"); + return {0, 0}; + } - tie(tl.iterMapOffset, tl.iterOffset) = addPredSparseIter(bc, predStates); + map predPrograms; + for (const auto &e : predProgramLists) { + DEBUG_PRINTF("pred %u has %zu programs\n", e.first, e.second.size()); + auto program = flattenRoleProgram(e.second); + u32 offset = writeRoleProgram(bc, program); + predPrograms.emplace(e.first, offset); } + + return addPredSparseIter(bc, predPrograms); } static @@ -3885,7 +3726,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.depths = findDepths(*this); // Build NFAs - map suffixes; set no_retrigger_queues; bool mpv_as_outfix; prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); @@ -3896,7 +3736,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 outfixEndQueue = qif.allocated_count(); u32 leftfixBeginQueue = outfixEndQueue; - if (!buildNfas(*this, bc, qif, &suffixes, &no_retrigger_queues, + if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &leftfixBeginQueue)) { return nullptr; } @@ -3913,14 +3753,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(ISALIGNED_16(lit_benefits_size)); vector suffixEkeyLists; - buildSuffixEkeyLists(*this, bc, qif, suffixes, &suffixEkeyLists); - - buildInitialRoleTable(*this, bc, suffixes); - - DEBUG_PRINTF("roletable %zu\n", bc.roleTable.size()); + buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists); - vector predTable; - buildPredTable(*this, bc, predTable); + assignStateIndices(*this, bc); u32 laggedRoseCount = 0; vector leftInfoTable; @@ -3930,13 +3765,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { vector literalTable; buildLiteralTable(*this, bc, literalTable); - buildSparseIter(*this, bc, literalTable, predTable); + buildSparseIter(*this, bc, literalTable); u32 eodIterOffset; - u32 eodIterMapOffset; - - tie(eodIterMapOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc, - predTable); + u32 eodProgramTableOffset; + tie(eodProgramTableOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc); vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -3944,13 +3777,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 lastByteOffset = buildLastByteIter(g, bc); // Enforce role table resource limit. - if (bc.roleTable.size() > cc.grey.limitRoseRoleCount) { + if (num_vertices(g) > cc.grey.limitRoseRoleCount) { throw ResourceLimitError(); } - // Write role programs into the engine blob. - writeRolePrograms(bc); - // Write root programs for literals into the engine blob. buildRootRolePrograms(*this, bc, literalTable); @@ -4010,10 +3840,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 literalLen = sizeof(RoseLiteral) * literalTable.size(); currOffset = literalOffset + literalLen; - u32 roleOffset = ROUNDUP_N(currOffset, alignof(RoseRole)); - u32 roleLen = sizeof(RoseRole) * bc.roleTable.size(); - currOffset = roleOffset + roleLen; - u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size(); currOffset = leftOffset + roseLen; @@ -4026,10 +3852,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 lookaroundTableLen = sizeof(s8) * bc.lookaround.size(); currOffset = lookaroundTableOffset + lookaroundTableLen; - u32 predOffset = ROUNDUP_N(currOffset, alignof(RosePred)); - u32 predLen = sizeof(RosePred) * predTable.size(); - currOffset = predOffset + predLen; - u32 nfaInfoOffset = ROUNDUP_N(currOffset, sizeof(u32)); u32 nfaInfoLen = sizeof(NfaInfo) * queue_count; currOffset = nfaInfoOffset + nfaInfoLen; @@ -4147,13 +3969,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->activeArrayCount = activeArrayCount; engine->activeLeftCount = activeLeftCount; engine->queueCount = queue_count; + engine->handledKeyCount = bc.handledKeys.size(); engine->group_weak_end = group_weak_end; engine->rolesWithStateCount = bc.numStates; - engine->roleOffset = roleOffset; - engine->roleCount = verify_u32(bc.roleTable.size()); engine->leftOffset = leftOffset; engine->roseCount = verify_u32(leftInfoTable.size()); engine->lookaroundTableOffset = lookaroundTableOffset; @@ -4162,8 +3983,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->outfixEndQueue = outfixEndQueue; engine->leftfixBeginQueue = leftfixBeginQueue; engine->initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; - engine->predOffset = predOffset; - engine->predCount = verify_u32(predTable.size()); engine->stateSize = mmbit_size(bc.numStates); engine->anchorStateSize = anchorStateSize; engine->nfaInfoOffset = nfaInfoOffset; @@ -4173,7 +3992,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->multidirectOffset = multidirectOffset; engine->eodIterOffset = eodIterOffset; - engine->eodIterMapOffset = eodIterMapOffset; + engine->eodProgramTableOffset = eodProgramTableOffset; engine->lastByteHistoryIterOffset = lastByteOffset; @@ -4263,7 +4082,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { } NfaInfo *nfa_infos = (NfaInfo *)(ptr + nfaInfoOffset); - populateNfaInfoBasics(*this, bc, outfixes, suffixes, suffixEkeyLists, + populateNfaInfoBasics(*this, bc, outfixes, suffixEkeyLists, no_retrigger_queues, nfa_infos); updateNfaState(bc, &engine->stateOffsets, nfa_infos, &engine->scratchStateSize, &engine->nfaStateSize, @@ -4275,14 +4094,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Copy in other tables copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); copy_bytes(ptr + engine->literalOffset, literalTable); - copy_bytes(ptr + engine->roleOffset, bc.roleTable); copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, ptr + lookaroundReachOffset, bc.lookaround); fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); - copy_bytes(ptr + engine->predOffset, predTable); copy_bytes(ptr + engine->anchoredReportMapOffset, art); copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); copy_bytes(ptr + engine->multidirectOffset, mdr_reports); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index d69d28d62..d8048eee9 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -78,77 +78,6 @@ string to_string(nfa_kind k) { return "?"; } -// Get the RoseRole associated with a given vertex in the build graph from the -// RoseEngine. -static -const RoseRole *getRoseRole(const RoseBuildImpl &build, - const RoseEngine *engine, RoseVertex v) { - if (!engine) { - return nullptr; - } - - u32 role_idx = build.g[v].role; - if (role_idx == MO_INVALID_IDX) { - return nullptr; - } - - const RoseRole *roles = getRoleTable(engine); - return &roles[role_idx]; -} - -#define SKIP_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ - const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -template -const Struct * -findInstruction(const RoseEngine *t, const RoseRole *role) { - if (!role->programOffset) { - return nullptr; - } - - const char *pc = (const char *)t + role->programOffset; - for (;;) { - u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); - if (code == Opcode) { - return (const Struct *)pc; - } - // Skip to the next instruction. - switch (code) { - SKIP_CASE(ANCHORED_DELAY) - SKIP_CASE(CHECK_ONLY_EOD) - SKIP_CASE(CHECK_ROOT_BOUNDS) - SKIP_CASE(CHECK_LEFTFIX) - SKIP_CASE(CHECK_LOOKAROUND) - SKIP_CASE(SOM_ADJUST) - SKIP_CASE(SOM_LEFTFIX) - SKIP_CASE(TRIGGER_INFIX) - SKIP_CASE(TRIGGER_SUFFIX) - SKIP_CASE(REPORT) - SKIP_CASE(REPORT_CHAIN) - SKIP_CASE(REPORT_EOD) - SKIP_CASE(REPORT_SOM_INT) - SKIP_CASE(REPORT_SOM) - SKIP_CASE(REPORT_SOM_KNOWN) - SKIP_CASE(SET_STATE) - SKIP_CASE(SET_GROUPS) - case ROSE_ROLE_INSTR_END: - return nullptr; - default: - assert(0); - return nullptr; - } - } - - return nullptr; -} - -#undef SKIP_CASE - namespace { class RoseGraphWriter { @@ -174,7 +103,7 @@ class RoseGraphWriter { } os << "[label=\""; - os << "role=" << g[v].role << "[i" << g[v].idx <<"]\\n"; + os << "idx=" << g[v].idx <<"\\n"; for (u32 lit_id : g[v].literals) { writeLiteral(os, lit_id); @@ -198,34 +127,23 @@ class RoseGraphWriter { os << " (rep=" << as_string_list(g[v].reports) << ")"; } - const RoseRole *r = getRoseRole(v); - if (g[v].suffix) { os << "\\nSUFFIX (TOP " << g[v].suffix.top; - if (r) { - const auto *ri = - findInstruction(t, r); - if (ri) { - os << ", Q" << ri->queue; - } - } else { - // Can't dump the queue number, but we can identify the suffix. - if (g[v].suffix.graph) { - os << ", graph=" << g[v].suffix.graph.get() - << " " << to_string(g[v].suffix.graph->kind); - } - if (g[v].suffix.castle) { - os << ", castle=" << g[v].suffix.castle.get(); - } - if (g[v].suffix.rdfa) { - os << ", dfa=" << g[v].suffix.rdfa.get(); - } - if (g[v].suffix.haig) { - os << ", haig=" << g[v].suffix.haig.get(); - } - + // Can't dump the queue number, but we can identify the suffix. + if (g[v].suffix.graph) { + os << ", graph=" << g[v].suffix.graph.get() << " " + << to_string(g[v].suffix.graph->kind); + } + if (g[v].suffix.castle) { + os << ", castle=" << g[v].suffix.castle.get(); + } + if (g[v].suffix.rdfa) { + os << ", dfa=" << g[v].suffix.rdfa.get(); + } + if (g[v].suffix.haig) { + os << ", haig=" << g[v].suffix.haig.get(); } + os << ")"; } @@ -247,15 +165,6 @@ class RoseGraphWriter { build.isRootSuccessor(v) ? "PREFIX" : "INFIX"; os << "\\nROSE " << roseKind; os << " ("; - if (r) { - const auto *ri = - findInstruction(t, r); - if (ri) { - os << "Q" << ri->queue << ", "; - } - } - os << "report " << g[v].left.leftfix_report << ")"; if (g[v].left.graph) { @@ -348,10 +257,6 @@ class RoseGraphWriter { } } - const RoseRole *getRoseRole(RoseVertex v) const { - return ue2::getRoseRole(build, t, v); - } - set ghost; const RoseBuildImpl &build; const RoseEngine *t; @@ -383,7 +288,7 @@ namespace { struct CompareVertexRole { explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {} inline bool operator()(const RoseVertex &a, const RoseVertex &b) const { - return g[a].role < g[b].role; + return g[a].idx < g[b].idx; } private: const RoseGraph &g; @@ -483,7 +388,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { for (RoseVertex v : verts) { // role info - os << " Role " << g[v].role << ": depth=" << depths.at(v) + os << " Index " << g[v].idx << ": depth=" << depths.at(v) << ", groups=0x" << hex << setw(16) << setfill('0') << g[v].groups << dec; @@ -497,14 +402,14 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { os << ", max_offset=" << g[v].max_offset << endl; // pred info for (const auto &ie : in_edges_range(v, g)) { - os << " Predecessor role="; - u32 predRole = g[source(ie, g)].role; - if (predRole == MO_INVALID_IDX) { + const auto &u = source(ie, g); + os << " Predecessor idx="; + if (u == build.root) { os << "ROOT"; - } else if (predRole == g[build.anchored_root].role) { + } else if (u == build.anchored_root) { os << "ANCHORED_ROOT"; } else { - os << predRole; + os << g[u].idx; } os << ": bounds [" << g[ie].minBound << ", "; if (g[ie].maxBound == ROSE_BOUND_INF) { @@ -589,70 +494,6 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); } -static -CharReach bitvectorToReach(const u8 *reach) { - CharReach cr; - - for (size_t i = 0; i < 256; i++) { - if (reach[i / 8] & (1U << (i % 8))) { - cr.set(i); - - } - } - return cr; -} - -static -void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t, - const Grey &grey, const string &filename) { - stringstream ss; - ss << grey.dumpPath << filename; - ofstream os(ss.str()); - - const RoseGraph &g = build.g; - - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - - for (RoseVertex v : vertices_range(g)) { - const RoseRole *role = getRoseRole(build, t, v); - if (!role) { - continue; - } - - const auto *ri = - findInstruction(t, role); - if (!ri) { - continue; - } - - const u32 look_idx = ri->index; - const u32 look_count = ri->count; - - os << "Role " << g[v].role << endl; - os << " literals: " << as_string_list(g[v].literals) << endl; - os << " lookaround: index=" << look_idx << ", count=" << look_count - << endl; - - const s8 *look = look_base + look_idx; - const s8 *look_end = look + look_count; - const u8 *reach = reach_base + look_idx * REACH_BITVECTOR_LEN; - - for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { - os << " " << std::setw(4) << std::setfill(' ') << int{*look} - << ": "; - describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); - os << endl; - } - - os << endl; - } - - os.close(); -} - void dumpRose(const RoseBuild &build_base, const RoseEngine *t, const Grey &grey) { if (!grey.dumpFlags) { @@ -692,9 +533,6 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t, f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w"); roseDumpStructRaw(t, f); fclose(f); - - // Lookaround tables. - dumpRoseLookaround(build, t, grey, "rose_lookaround.txt"); } } // namespace ue2 diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 109c2d26c..9ec26d4c8 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -89,12 +89,10 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, next_nfa_report(0) { // add root vertices to graph g[root].idx = vertexIndex++; - g[root].role = MO_INVALID_IDX; g[root].min_offset = 0; g[root].max_offset = 0; g[anchored_root].idx = vertexIndex++; - g[anchored_root].role = MO_INVALID_IDX; g[anchored_root].min_offset = 0; g[anchored_root].max_offset = 0; } @@ -194,7 +192,7 @@ bool RoseBuildImpl::hasLiteralInTable(RoseVertex v, bool RoseBuildImpl::hasNoFloatingRoots() const { for (auto v : adjacent_vertices_range(root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("direct floating root %u\n", g[v].role); + DEBUG_PRINTF("direct floating root %zu\n", g[v].idx); return false; } } @@ -202,7 +200,7 @@ bool RoseBuildImpl::hasNoFloatingRoots() const { /* need to check if the anchored_root has any literals which are too deep */ for (auto v : adjacent_vertices_range(anchored_root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("indirect floating root %u\n", g[v].role); + DEBUG_PRINTF("indirect floating root %zu\n", g[v].idx); return false; } } diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 6ec890642..aa13a627c 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -40,7 +40,9 @@ #include "nfa/nfa_build_util.h" #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" +#include "util/dump_charclass.h" #include "util/multibit_internal.h" +#include "util/multibit.h" #include #include @@ -114,159 +116,78 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); } -static -const RosePred *getPredTable(const RoseEngine *t, u32 *count) { - *count = t->predCount; - return (const RosePred *)loadFromByteCodeOffset(t, t->predOffset); -} - -static -u32 literalsWithDepth(const RoseEngine *t, u8 depth) { - u32 n = 0; - const RoseLiteral *tl = getLiteralTable(t); - const RoseLiteral *tl_end = tl + t->literalCount; - - for (; tl != tl_end; ++tl) { - if (tl->minDepth == depth) { - n++; - } - } - return n; -} - static u32 literalsWithDirectReports(const RoseEngine *t) { return t->totalNumLiterals - t->literalCount; } -template +template static -u32 literalsWithProp(const RoseEngine *t, member_type_ptr prop) { - u32 n = 0; +size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) { const RoseLiteral *tl = getLiteralTable(t); const RoseLiteral *tl_end = tl + t->literalCount; - for (; tl != tl_end; ++tl) { - if (tl->*prop) { - n++; - } - } - return n; + return count_if(tl, tl_end, pred); } -template static -u32 rolesWithPropValue(const RoseEngine *t, member_type RoseRole::*prop, - member_type value) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; - - for (; tr != tr_end; ++tr) { - if (tr->*prop == value) { - n++; - } - } - return n; +size_t literalsWithDepth(const RoseEngine *t, u8 depth) { + return literalsWithPredicate( + t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; }); } static -u32 literalsInGroups(const RoseEngine *t, u32 from, u32 to) { - u32 n = 0; - const RoseLiteral *tl = getLiteralTable(t); - const RoseLiteral *tl_end = tl + t->literalCount; - +size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) { rose_group mask = ~((1ULL << from) - 1); if (to < 64) { mask &= ((1ULL << to) - 1); } - for (; tl != tl_end; ++tl) { - if (tl->groups & mask) { - n++; - } - } - return n; + return literalsWithPredicate( + t, [&mask](const RoseLiteral &l) { return l.groups & mask; }); } static -u32 rolesWithFlag(const RoseEngine *t, u32 flag) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; - - for (; tr != tr_end; ++tr) { - if (tr->flags & flag) { - n++; +CharReach bitvectorToReach(const u8 *reach) { + CharReach cr; + + for (size_t i = 0; i < 256; i++) { + if (reach[i / 8] & (1U << (i % 8))) { + cr.set(i); + } } - return n; + return cr; } -#define HANDLE_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ - const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - static -u32 rolesWithInstr(const RoseEngine *t, - enum RoseRoleInstructionCode find_code) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; - - for (; tr != tr_end; ++tr) { - if (!tr->programOffset) { - continue; - } +void dumpLookaround(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_CHECK_LOOKAROUND *ri) { + assert(ri); - const char *pc = (const char *)t + tr->programOffset; - for (;;) { - u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); - if (code == find_code) { - n++; - goto next_role; - } - switch (code) { - HANDLE_CASE(CHECK_ONLY_EOD) - HANDLE_CASE(CHECK_ROOT_BOUNDS) - HANDLE_CASE(CHECK_LOOKAROUND) - HANDLE_CASE(CHECK_LEFTFIX) - HANDLE_CASE(ANCHORED_DELAY) - HANDLE_CASE(SOM_ADJUST) - HANDLE_CASE(SOM_LEFTFIX) - HANDLE_CASE(TRIGGER_INFIX) - HANDLE_CASE(TRIGGER_SUFFIX) - HANDLE_CASE(REPORT) - HANDLE_CASE(REPORT_CHAIN) - HANDLE_CASE(REPORT_EOD) - HANDLE_CASE(REPORT_SOM_INT) - HANDLE_CASE(REPORT_SOM) - HANDLE_CASE(REPORT_SOM_KNOWN) - HANDLE_CASE(SET_STATE) - HANDLE_CASE(SET_GROUPS) - case ROSE_ROLE_INSTR_END: - goto next_role; - default: - assert(0); - return 0; - } - } - next_role:; + const u8 *base = (const u8 *)t; + const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); + const u8 *reach_base = base + t->lookaroundReachOffset; + + const s8 *look = look_base + ri->index; + const s8 *look_end = look + ri->count; + const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN; + + os << " contents:" << endl; + + for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { + os << " " << std::setw(4) << std::setfill(' ') << int{*look} + << ": "; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; } - return n; } -#undef HANDLE_CASE - #define PROGRAM_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ + case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ - << ": " #name " (" << (int)ROSE_ROLE_INSTR_##name << ")" << endl; \ - const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; + << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ + const auto *ri = (const struct ROSE_STRUCT_##name *)pc; #define PROGRAM_NEXT_INSTRUCTION \ pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ @@ -274,11 +195,11 @@ u32 rolesWithInstr(const RoseEngine *t, } static -void dumpRoleProgram(ofstream &os, const char *pc) { +void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *pc_base = pc; for (;;) { u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); + assert(code <= ROSE_INSTR_END); switch (code) { PROGRAM_CASE(ANCHORED_DELAY) { os << " depth " << u32{ri->depth} << endl; @@ -293,17 +214,24 @@ void dumpRoleProgram(ofstream &os, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + PROGRAM_CASE(CHECK_BOUNDS) { os << " min_bound " << ri->min_bound << endl; os << " max_bound " << ri->max_bound << endl; os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_NOT_HANDLED) { + os << " key " << ri->key << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LOOKAROUND) { os << " index " << ri->index << endl; os << " count " << ri->count << endl; os << " fail_jump +" << ri->fail_jump << endl; + dumpLookaround(os, t, ri); } PROGRAM_NEXT_INSTRUCTION @@ -396,26 +324,27 @@ void dumpRoleProgram(ofstream &os, const char *pc) { #undef PROGRAM_NEXT_INSTRUCTION static -void dumpRoseRolePrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const RoseRole *roles = getRoleTable(t); - const char *base = (const char *)t; +void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset, + u32 programTableOffset) { + const auto *it = + (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset); + const u32 *programTable = + (const u32 *)loadFromByteCodeOffset(t, programTableOffset); - for (u32 i = 0; i < t->roleCount; i++) { - const RoseRole *role = &roles[i]; - os << "Role " << i << endl; + // Construct a full multibit. + const u32 total_bits = t->rolesWithStateCount; + const vector bits(mmbit_size(total_bits), u8{0xff}); - if (!role->programOffset) { - os << " " << endl; - continue; - } - - dumpRoleProgram(os, base + role->programOffset); - os << endl; + struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES]; + u32 idx = 0; + for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s); + i != MMB_INVALID; + i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) { + u32 programOffset = programTable[idx]; + os << "Sparse Iter Program " << idx << " triggered by state " << i + << " @ " << programOffset << ":" << endl; + dumpRoleProgram(os, t, (const char *)t + programOffset); } - - os.close(); } static @@ -427,12 +356,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { for (u32 i = 0; i < t->literalCount; i++) { const RoseLiteral *lit = &lits[i]; - if (!lit->rootProgramOffset) { - continue; + os << "Literal " << i << endl; + os << "---------------" << endl; + + if (lit->rootProgramOffset) { + os << "Root Program @ " << lit->rootProgramOffset << ":" << endl; + dumpRoleProgram(os, t, base + lit->rootProgramOffset); + } else { + os << "" << endl; + } + + if (lit->iterOffset != ROSE_OFFSET_INVALID) { + dumpSparseIterPrograms(os, t, lit->iterOffset, + lit->iterProgramOffset); + } else { + os << "" << endl; } - os << "Literal " << i << endl; - dumpRoleProgram(os, base + lit->rootProgramOffset); os << endl; } @@ -440,37 +380,17 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { } static -const char *historyName(RoseRoleHistory h) { - switch (h) { - case ROSE_ROLE_HISTORY_NONE: - return "history none"; - case ROSE_ROLE_HISTORY_ANCH: - return "history anch"; - case ROSE_ROLE_HISTORY_LAST_BYTE: - return "history last_byte"; - default: - return "unknown"; - } -} - -static -void dumpPreds(FILE *f, const RoseEngine *t) { - map counts; - - u32 predCount = 0; - const RosePred *tp = getPredTable(t, &predCount); - const RosePred *tp_end = tp + predCount; +void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); - for (; tp != tp_end; ++tp) { - assert(tp->historyCheck < ROSE_ROLE_HISTORY_INVALID); - counts[(RoseRoleHistory)tp->historyCheck] += 1; + if (t->eodIterOffset) { + dumpSparseIterPrograms(os, t, t->eodIterOffset, + t->eodProgramTableOffset); + } else { + os << "" << endl; } - for (map::const_iterator it = counts.begin(), - ite = counts.end(); - it != ite; ++it) { - fprintf(f, " - %-18s: %u\n", historyName(it->first), it->second); - } + os.close(); } static @@ -805,16 +725,12 @@ void roseDumpText(const RoseEngine *t, FILE *f) { sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); fprintf(f, " - literal table : %zu bytes\n", t->literalCount * sizeof(RoseLiteral)); - fprintf(f, " - role table : %zu bytes\n", - t->roleCount * sizeof(RoseRole)); - fprintf(f, " - pred table : %zu bytes\n", - t->predCount * sizeof(RosePred)); fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %u bytes\n", t->anchoredReportMapOffset - t->nfaInfoOffset); fprintf(f, " - lookaround table : %u bytes\n", - t->predOffset - t->lookaroundTableOffset); + t->nfaInfoOffset - t->lookaroundTableOffset); fprintf(f, " - lookaround reach : %u bytes\n", t->lookaroundTableOffset - t->lookaroundReachOffset); @@ -839,46 +755,30 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "\n"); fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); + fprintf(f, "handled key count : %u\n", t->handledKeyCount); fprintf(f, "\n"); fprintf(f, "number of literals : %u\n", t->totalNumLiterals); fprintf(f, " - delayed : %u\n", t->delay_count); fprintf(f, " - direct report : %u\n", literalsWithDirectReports(t)); - fprintf(f, " - that squash group : %u\n", - literalsWithProp(t, &RoseLiteral::squashesGroup)); + fprintf(f, " - that squash group : %zu\n", + literalsWithPredicate( + t, [](const RoseLiteral &l) { return l.squashesGroup != 0; })); fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); - - u32 group_weak_end = t->group_weak_end; + fprintf(f, " - with root program : %zu\n", + literalsWithPredicate(t, [](const RoseLiteral &l) { + return l.rootProgramOffset != 0; + })); + fprintf(f, " - with sparse iter : %zu\n", + literalsWithPredicate(t, [](const RoseLiteral &l) { + return l.iterOffset != ROSE_OFFSET_INVALID; + })); fprintf(f, " - in groups ::\n"); - fprintf(f, " + weak : %u\n", - literalsInGroups(t, 0, group_weak_end)); - fprintf(f, " + general : %u\n", - literalsInGroups(t, group_weak_end, sizeof(u64a) * 8)); - fprintf(f, "number of roles : %u\n", t->roleCount); - fprintf(f, " - with state index : %u\n", t->rolesWithStateCount); - fprintf(f, " - with leftfix nfa : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LEFTFIX)); - fprintf(f, " - with suffix nfa : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_TRIGGER_SUFFIX)); - fprintf(f, " - with lookaround : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LOOKAROUND)); - fprintf(f, " - with reports : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT)); - fprintf(f, " - with som reports : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT_SOM_INT)); - fprintf(f, " - match only at end : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); - fprintf(f, " + anchored : %u\n", t->anchoredMatches); - - fprintf(f, " - simple preds : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_SIMPLE)); - fprintf(f, " - bound root preds : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS)); - fprintf(f, " - 'any' preds : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_ANY)); - fprintf(f, "number of preds : %u\n", t->predCount); - dumpPreds(f, t); + fprintf(f, " + weak : %zu\n", + literalsInGroups(t, 0, t->group_weak_end)); + fprintf(f, " + general : %zu\n", + literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8)); u32 depth1 = literalsWithDepth(t, 1); u32 depth2 = literalsWithDepth(t, 2); @@ -977,16 +877,13 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); - DUMP_U32(t, roleOffset); - DUMP_U32(t, roleCount); - DUMP_U32(t, predOffset); - DUMP_U32(t, predCount); + DUMP_U32(t, handledKeyCount); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); DUMP_U32(t, lookaroundReachOffset); DUMP_U32(t, eodIterOffset); - DUMP_U32(t, eodIterMapOffset); + DUMP_U32(t, eodProgramTableOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); @@ -1048,52 +945,15 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } -static -void roseDumpPredStructRaw(const RoseEngine *t, FILE *f) { - u32 pred_count = 0; - const RosePred *pred_table = getPredTable(t, &pred_count); - fprintf(f, "pred_count = %u\n", pred_count); - if (!pred_table) { - return; - } - - for (const RosePred *p = pred_table; p < pred_table + pred_count; p++) { - fprintf(f, "pred[%zu] = {\n", p - pred_table); - DUMP_U32(p, role); - DUMP_U32(p, minBound); - DUMP_U32(p, maxBound); - DUMP_U8(p, historyCheck); - fprintf(f, "}\n"); - } -} - -static -void roseDumpRoleStructRaw(const RoseEngine *t, FILE *f) { - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; - fprintf(f, "role_count = %zd\n", tr_end - tr); - if (!tr) { - return; - } - - for (const RoseRole *p = tr; p < tr_end; p++) { - fprintf(f, "role[%zu] = {\n", p - tr); - DUMP_U32(p, flags); - DUMP_U32(p, programOffset); - fprintf(f, "}\n"); - } -} - -void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) { +void roseDumpComponents(const RoseEngine *t, bool dump_raw, + const string &base) { dumpComponentInfo(t, base); dumpNfas(t, dump_raw, base); dumpAnchored(t, base); dumpRevComponentInfo(t, base); dumpRevNfas(t, dump_raw, base); - - // Role programs. - dumpRoseRolePrograms(t, base + "/rose_role_programs.txt"); - dumpRoseLitPrograms(t, base + "/rose_lit_root_programs.txt"); + dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); + dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); } void roseDumpInternals(const RoseEngine *t, const string &base) { @@ -1139,14 +999,6 @@ void roseDumpInternals(const RoseEngine *t, const string &base) { roseDumpStructRaw(t, f); fclose(f); - f = fopen((base + "/rose_preds.txt").c_str(), "w"); - roseDumpPredStructRaw(t, f); - fclose(f); - - f = fopen((base + "/rose_roles.txt").c_str(), "w"); - roseDumpRoleStructRaw(t, f); - fclose(f); - roseDumpComponents(t, true, base); } diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index e29fd2dd0..b0ac8d11b 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -39,7 +39,7 @@ #include "ue2common.h" #include "rose_build.h" -#include "rose_internal.h" /* role history, etc */ +#include "rose_internal.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "util/charreach.h" #include "util/depth.h" @@ -65,6 +65,14 @@ enum rose_literal_table { ROSE_EVENT //!< "literal-like" events, such as EOD }; +/** \brief Edge history types. */ +enum RoseRoleHistory { + ROSE_ROLE_HISTORY_NONE, //!< no special history + ROSE_ROLE_HISTORY_ANCH, //!< previous role is at a fixed offset + ROSE_ROLE_HISTORY_LAST_BYTE, //!< previous role can only match at EOD + ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned +}; + #include "util/order_check.h" /** \brief Provides information about the (pre|in)fix engine to the left of a @@ -140,9 +148,6 @@ struct RoseVertexProps { /** \brief Report IDs to fire. */ flat_set reports; - /** \brief Role ID for this vertex. These are what end up in the bytecode. */ - u32 role = ~u32{0}; - /** \brief Bitmask of groups that this role sets. */ rose_group groups = 0; diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 00e62eb96..7aae2f22c 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -73,18 +73,55 @@ ReportID literalToReport(u32 id) { return id & ~LITERAL_DR_FLAG; } -// Structure representing a literal. Each literal may have many roles. +/** \brief Structure representing a literal. */ struct RoseLiteral { - u32 rootProgramOffset; // role program to run for root roles. - u32 iterOffset; // offset of sparse iterator, relative to rose - u32 iterMapOffset; // offset of the iter mapping table, relative to rose - rose_group groups; // bitset of groups that cause this literal to fire. - u8 minDepth; // the minimum of this literal's roles' depths (for depths > 1) - u8 squashesGroup; /**< literal switches off its group behind it if it sets a - * role */ - u32 delay_mask; /**< bit set indicates that the literal inserts a delayed - * match at the given offset */ - u32 delayIdsOffset; // offset to array of ids to poke in the delay structure + /** + * \brief Role program to run unconditionally when this literal is seen. + * + * Offset is relative to RoseEngine, or zero for no program. + */ + u32 rootProgramOffset; + + /** + * \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over + * predecessor states. + * + * Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no + * iterator. + */ + u32 iterOffset; + + /** + * \brief Table of role programs to run when triggered by the sparse + * iterator, indexed by dense sparse iter index. + * + * Offset is relative to RoseEngine, zero for no programs. + */ + u32 iterProgramOffset; + + /** \brief Bitset of groups that cause this literal to fire. */ + rose_group groups; + + /** + * \brief The minimum depth of this literal in the Rose graph (for depths + * greater than 1). + */ + u8 minDepth; + + /** + * \brief True if this literal switches off its group behind it when it + * sets a role. + */ + u8 squashesGroup; + + /** + * \brief Bitset which indicates that the literal inserts a delayed + * match at the given offset. + */ + u32 delay_mask; + + /** \brief Offset to array of ids to poke in the delay structure. */ + u32 delayIdsOffset; }; /* Allocation of Rose literal ids @@ -179,15 +216,6 @@ struct RoseLiteral { * terminals. */ -// We have different types of role history storage. -enum RoseRoleHistory { - ROSE_ROLE_HISTORY_NONE, // I'm sorry, I don't recall. - ROSE_ROLE_HISTORY_ANCH, // used when previous role is at a fixed offset - ROSE_ROLE_HISTORY_LAST_BYTE, /* used when previous role can only match at the - * last byte of a stream */ - ROSE_ROLE_HISTORY_INVALID // history not yet assigned -}; - struct RoseCountingMiracle { char shufti; /** 1: count shufti class; 0: count a single character */ u8 count; /** minimum number of occurrences for the counting @@ -225,15 +253,6 @@ struct NfaInfo { * matches */ }; -/* We allow different types of role-predecessor relationships. These are stored - * in with the flags */ -#define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no - * offset tracking */ -#define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */ - -#define ROSE_ROLE_PRED_CLEAR_MASK \ - (~(ROSE_ROLE_PRED_SIMPLE | ROSE_ROLE_PRED_ANY)) - #define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one * whole byte (OWB) (streaming only). Other * values in OWB are reserved for zombie @@ -241,33 +260,6 @@ struct NfaInfo { #define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose * prefix checks */ -// Structure representing a literal role. -struct RoseRole { - u32 flags; - u32 programOffset; /**< offset to program to run. */ -}; - -// Structure representing a predecessor relationship -struct RosePred { - u32 role; // index of predecessor role - u32 minBound; // min bound on distance from pred (_ANCH ->absolute offset) - u32 maxBound; /* max bound on distance from pred, or ROSE_BOUND_INF - * (_ANCH -> absolute offset ) */ - u8 historyCheck; // from enum RoseRoleHistory -}; - -// Structure mapping between the dense index produced by the literal sparse -// iterator and a list of roles. -struct RoseIterMapping { - u32 offset; // offset into iter role table - u32 count; // number of roles -}; - -struct RoseIterRole { - u32 role; - u32 pred; -}; - /** * \brief Rose state offsets. * @@ -376,8 +368,6 @@ struct RoseBoundaryReports { // 1c. eod-anchored literal matcher table // 1d. small block table // 2. array of RoseLiteral (literalCount entries) -// 3. array of RoseRole (roleCount entries) -// 4. array of RosePred (predCount entries) // 8. array of NFA offsets, one per queue // 9. array of state offsets, one per queue (+) // 10. array of role ids for the set of all root roles @@ -447,10 +437,10 @@ struct RoseEngine { u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ - u32 roleOffset; // offset of RoseRole array (bytes) - u32 roleCount; // number of RoseRole entries - u32 predOffset; // offset of RosePred array (bytes) - u32 predCount; // number of RosePred entries + + /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role + * programs. Used to size the handled_roles fatbit in scratch. */ + u32 handledKeyCount; u32 leftOffset; u32 roseCount; @@ -459,7 +449,7 @@ struct RoseEngine { * bytes each) */ u32 eodIterOffset; // or 0 if no eod iterator - u32 eodIterMapOffset; + u32 eodProgramTableOffset; u32 lastByteHistoryIterOffset; // if non-zero @@ -614,22 +604,6 @@ const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) { return tl; } -static really_inline -const struct RoseRole *getRoleTable(const struct RoseEngine *t) { - const struct RoseRole *r - = (const struct RoseRole *)((const char *)t + t->roleOffset); - assert(ISALIGNED_N(r, 4)); - return r; -} - -static really_inline -const struct RosePred *getPredTable(const struct RoseEngine *t) { - const struct RosePred *p - = (const struct RosePred *)((const char *)t + t->predOffset); - assert(ISALIGNED_N(p, 4)); - return p; -} - static really_inline const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) { const struct LeftNfaInfo *r diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 40f013ca2..ee747b9d6 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -40,54 +40,61 @@ #define ROSE_INSTR_MIN_ALIGN 8U /** \brief Role program instruction opcodes. */ -enum RoseRoleInstructionCode { - ROSE_ROLE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. - ROSE_ROLE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. - ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS, //!< Bounds on distance from root. - ROSE_ROLE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. - ROSE_ROLE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. - ROSE_ROLE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. - ROSE_ROLE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. - ROSE_ROLE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. - ROSE_ROLE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. - ROSE_ROLE_INSTR_REPORT, //!< Fire an ordinary report. - ROSE_ROLE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). - ROSE_ROLE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. - ROSE_ROLE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. - ROSE_ROLE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. - ROSE_ROLE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. - ROSE_ROLE_INSTR_SET_STATE, //!< Switch a state index on. - ROSE_ROLE_INSTR_SET_GROUPS, //!< Set some literal group bits. - ROSE_ROLE_INSTR_END //!< End of program. -}; - -struct ROSE_ROLE_STRUCT_ANCHORED_DELAY { +enum RoseInstructionCode { + ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. + ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. + ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". + ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. + ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. + ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. + ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. + ROSE_INSTR_REPORT, //!< Fire an ordinary report. + ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). + ROSE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. + ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. + ROSE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. + ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. + ROSE_INSTR_SET_STATE, //!< Switch a state index on. + ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_INSTR_END //!< End of program. +}; + +struct ROSE_STRUCT_ANCHORED_DELAY { u8 code; //!< From enum RoseRoleInstructionCode. u8 depth; //!< Depth for this state. rose_group groups; //!< Bitmask. u32 done_jump; //!< Jump forward this many bytes if successful. }; -struct ROSE_ROLE_STRUCT_CHECK_ONLY_EOD { +struct ROSE_STRUCT_CHECK_ONLY_EOD { u8 code; //!< From enum RoseRoleInstructionCode. u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS { +struct ROSE_STRUCT_CHECK_BOUNDS { u8 code; //!< From enum RoseRoleInstructionCode. u32 min_bound; //!< Min distance from zero. u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_CHECK_LOOKAROUND { +struct ROSE_STRUCT_CHECK_NOT_HANDLED { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 key; //!< Key in the "handled_roles" fatbit in scratch. + u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. +}; + +struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseRoleInstructionCode. u32 index; u32 count; u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX { +struct ROSE_STRUCT_CHECK_LEFTFIX { u8 code; //!< From enum RoseRoleInstructionCode. u32 queue; //!< Queue of leftfix to check. u32 lag; //!< Lag of leftfix for this case. @@ -95,72 +102,72 @@ struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX { u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_SOM_ADJUST { +struct ROSE_STRUCT_SOM_ADJUST { u8 code; //!< From enum RoseRoleInstructionCode. u32 distance; //!< Distance to EOM. }; -struct ROSE_ROLE_STRUCT_SOM_LEFTFIX { +struct ROSE_STRUCT_SOM_LEFTFIX { u8 code; //!< From enum RoseRoleInstructionCode. u32 queue; //!< Queue index of leftfix providing SOM. u32 lag; //!< Lag of leftfix for this case. }; -struct ROSE_ROLE_STRUCT_TRIGGER_INFIX { +struct ROSE_STRUCT_TRIGGER_INFIX { u8 code; //!< From enum RoseRoleInstructionCode. u8 cancel; //!< Cancels previous top event. u32 queue; //!< Queue index of infix. u32 event; //!< Queue event, from MQE_*. }; -struct ROSE_ROLE_STRUCT_TRIGGER_SUFFIX { +struct ROSE_STRUCT_TRIGGER_SUFFIX { u8 code; //!< From enum RoseRoleInstructionCode. u32 queue; //!< Queue index of suffix. u32 event; //!< Queue event, from MQE_*. }; -struct ROSE_ROLE_STRUCT_REPORT { +struct ROSE_STRUCT_REPORT { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_CHAIN { +struct ROSE_STRUCT_REPORT_CHAIN { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_EOD { +struct ROSE_STRUCT_REPORT_EOD { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_SOM_INT { +struct ROSE_STRUCT_REPORT_SOM_INT { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_SOM { +struct ROSE_STRUCT_REPORT_SOM { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN { +struct ROSE_STRUCT_REPORT_SOM_KNOWN { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_SET_STATE { +struct ROSE_STRUCT_SET_STATE { u8 code; //!< From enum RoseRoleInstructionCode. u8 depth; //!< Depth for this state. u32 index; //!< State index in multibit. }; -struct ROSE_ROLE_STRUCT_SET_GROUPS { +struct ROSE_STRUCT_SET_GROUPS { u8 code; //!< From enum RoseRoleInstructionCode. rose_group groups; //!< Bitmask. }; -struct ROSE_ROLE_STRUCT_END { +struct ROSE_STRUCT_END { u8 code; //!< From enum RoseRoleInstructionCode. }; diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 2a87e3eb3..d71c32d63 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -172,15 +172,6 @@ const struct internal_report *getInternalReport(const struct RoseEngine *t, return reports + intId; } -static really_inline -const struct RoseRole *getRoleByOffset(const struct RoseEngine *t, u32 offset) { - const struct RoseRole *tr = (const void *)((const char *)t + offset); - - assert((size_t)(tr - getRoleTable(t)) < t->roleCount); - DEBUG_PRINTF("get root role %zu\n", tr - getRoleTable(t)); - return tr; -} - #define ANCHORED_MATCH_SENTINEL (~0U) static really_inline diff --git a/src/scratch.c b/src/scratch.c index b0888fdb2..30241ab49 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -90,7 +90,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + bStateSize + tStateSize + fullStateSize + 63 /* cacheline padding */ + nfa_context_size - + fatbit_size(proto->roleCount) /* handled roles */ + + fatbit_size(proto->handledKeyCount) /* handled roles */ + fatbit_size(queueCount) /* active queue array */ + 2 * fatbit_size(deduperCount) /* need odd and even logs */ + 2 * fatbit_size(deduperCount) /* ditto som logs */ @@ -192,7 +192,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { current += fatbit_size(queueCount); s->handled_roles = (struct fatbit *)current; - current += fatbit_size(proto->roleCount); + current += fatbit_size(proto->handledKeyCount); s->deduper.log[0] = (struct fatbit *)current; current += fatbit_size(deduperCount); @@ -312,9 +312,9 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->delay_count = rose->delay_count; } - if (rose->roleCount > proto->roleCount) { + if (rose->handledKeyCount > proto->handledKeyCount) { resize = 1; - proto->roleCount = rose->roleCount; + proto->handledKeyCount = rose->handledKeyCount; } if (rose->tStateSize > proto->tStateSize) { diff --git a/src/scratch.h b/src/scratch.h index 1d329bda0..07e725117 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -180,7 +180,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 delay_count; u32 scratchSize; u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; - u32 roleCount; + u32 handledKeyCount; struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already * handled by this literal */ u64a *som_store; /**< array of som locations */ From 86a52971caebe792cf759741604a1eb4d450928f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Dec 2015 09:04:36 +1100 Subject: [PATCH 009/218] Remove dead code: EdgeSourceStateCompare --- src/rose/rose_build_bytecode.cpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 6b6e443fd..708d3c8a8 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -115,29 +115,6 @@ namespace ue2 { namespace /* anon */ { -// Orders RoseEdge edges by the state index of the source node -struct EdgeSourceStateCompare { - EdgeSourceStateCompare( - const RoseGraph &g_, - const ue2::unordered_map &roleStateIndices_) - : g(g_), roleStateIndices(roleStateIndices_) {} - - u32 state_index(RoseVertex v) const { - auto it = roleStateIndices.find(v); - if (it != roleStateIndices.end()) { - return it->second; - } - return MMB_INVALID; - } - - bool operator()(const RoseEdge &a, const RoseEdge &b) const { - return state_index(source(a, g)) < state_index(source(b, g)); - } - - const RoseGraph &g; - const ue2::unordered_map &roleStateIndices; -}; - struct left_build_info { // Constructor for an engine implementation. left_build_info(u32 q, u32 l, u32 t, rose_group sm, From 326abeb3ee05d3c3dbf818c8393e466e8d961855 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Dec 2015 15:35:12 +1100 Subject: [PATCH 010/218] Perform an early removeRedundancy call on graph This allows sibling character classes to be merged together before graph component splitting is done by calcComponents(). In particular, this transforms (A|a)(B|b)(C|c) into [Aa][Bb][Cc] earlier. --- src/nfagraph/ng.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 7f866a66a..758841819 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -402,6 +402,13 @@ bool NG::addGraph(NGWrapper &w) { dumpDotWrapper(w, "03_early", cc.grey); + // Perform a reduction pass to merge sibling character classes together. + if (cc.grey.performGraphSimplification) { + removeRedundancy(w, som); + } + + dumpDotWrapper(w, "04_reduced", cc.grey); + // If we've got some literals that span the graph from start to accept, we // can split them off into Rose from here. if (!som) { From db4176c13ed030fbc64111950b25e68f9aa07773 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 15 Dec 2015 10:05:22 +1100 Subject: [PATCH 011/218] convertAnchPrefixToBounds: check size of delay_adj Avoid subtracting delay_adj from a smaller max bound. --- src/rose/rose_build_convert.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 2ce211bf5..f5e99c232 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -1098,6 +1098,11 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str()); DEBUG_PRINTF("delay adj %u\n", (u32)delay_adj); + if (delay_adj >= pr.bounds.max) { + DEBUG_PRINTF("delay adj too large\n"); + continue; + } + DepthMinMax bounds(pr.bounds); // copy if (delay_adj > bounds.min) { bounds.min = 0; From 8069e99beeae357e62e10a18711e03b91dc6b7dd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 15 Dec 2015 10:10:59 +1100 Subject: [PATCH 012/218] make_disjoint: Remove dead code --- src/util/charreach_util.h | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/src/util/charreach_util.h b/src/util/charreach_util.h index 47f76a3f3..f0dc4227b 100644 --- a/src/util/charreach_util.h +++ b/src/util/charreach_util.h @@ -29,43 +29,11 @@ #ifndef CHARREACH_UTIL_H #define CHARREACH_UTIL_H -#include -#include - -#include "charreach.h" +#include "ue2common.h" namespace ue2 { -template -std::map > -make_disjoint(const std::map > &in) { - using namespace std; - - map > by_char; - for (typename map >::const_iterator it = in.begin(); - it != in.end(); ++it) { - const CharReach &cr = it->first; - for (size_t j = cr.find_first(); j != CharReach::npos; - j = cr.find_next(j)) { - by_char[j].insert(it->second.begin(), it->second.end()); - } - } - - map, CharReach> rev; - for (typename map >::const_iterator it = by_char.begin(); - it != by_char.end(); ++it) { - rev[it->second].set(it->first); - } - - map > out; - for (typename map, CharReach>::const_iterator it = rev.begin(); - it != rev.end(); ++it) { - assert(out.find(it->second) == out.end()); - out[it->second] = it->first; - } - - return out; -} +class CharReach; void make_caseless(CharReach *cr); From b2ebdac642d4d1b16fcbdea7ce9d300d19488f67 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Dec 2015 11:41:47 +1100 Subject: [PATCH 013/218] rose: Extend program to handle literals, iterators - cleanups - add sparse iter instructions - merge "root" and "sparse iter" programs together - move program execution to new file program_runtime.h - simplify EOD execution --- CMakeLists.txt | 1 + src/rose/eod.c | 84 +-- src/rose/match.c | 1173 +----------------------------- src/rose/match.h | 5 - src/rose/program_runtime.h | 1081 +++++++++++++++++++++++++++ src/rose/rose.h | 48 +- src/rose/rose_build_bytecode.cpp | 415 ++++++----- src/rose/rose_build_compile.cpp | 7 + src/rose/rose_build_dump.cpp | 1 - src/rose/rose_dump.cpp | 116 ++- src/rose/rose_internal.h | 34 +- src/rose/rose_program.h | 70 +- 12 files changed, 1533 insertions(+), 1502 deletions(-) create mode 100644 src/rose/program_runtime.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f10e5cb5e..4034b14ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -467,6 +467,7 @@ set (hs_exec_SRCS src/rose/match.h src/rose/match.c src/rose/miracle.h + src/rose/program_runtime.h src/rose/runtime.h src/rose/rose.h src/rose/rose_internal.h diff --git a/src/rose/eod.c b/src/rose/eod.c index dec07b546..014b51ca9 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -28,6 +28,7 @@ #include "catchup.h" #include "match.h" +#include "program_runtime.h" #include "rose.h" #include "util/fatbit.h" @@ -107,43 +108,18 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, } static rose_inline -int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, +int roseEodRunIterator(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch) { - if (!t->eodIterOffset) { + if (!t->eodIterProgramOffset) { return MO_CONTINUE_MATCHING; } - DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset); + DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); - const u32 *programTable = getByOffset(t, t->eodProgramTableOffset); - const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); - assert(ISALIGNED(programTable)); - assert(ISALIGNED(it)); - - // Sparse iterator state was allocated earlier - struct mmbit_sparse_state *s = scratch->sparse_iter_state; - struct fatbit *handled_roles = scratch->handled_roles; - - const u32 numStates = t->rolesWithStateCount; - - void *role_state = getRoleState(state); - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s); - - fatbit_clear(handled_roles); - - int work_done = 0; // not read from in this path. - - for (; i != MMB_INVALID; - i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { - DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx); - u32 programOffset = programTable[idx]; - u64a som = 0; - if (roseRunRoleProgram(t, programOffset, offset, &som, - &(scratch->tctxt), - &work_done) == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } + int work_done = 0; + if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0, + &work_done) == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; } return MO_CONTINUE_MATCHING; @@ -236,6 +212,27 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, u8 *state, u64a offset, } } +static rose_inline +int roseRunEodProgram(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + if (!t->eodProgramOffset) { + return MO_CONTINUE_MATCHING; + } + + DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); + + // There should be no pending delayed literals. + assert(!scratch->tctxt.filledDelayedSlots); + + int work_done = 0; + if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0, + &work_done) == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} + static really_inline void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, struct hs_scratch *scratch, const char is_streaming) { @@ -244,31 +241,20 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, assert(!scratch->core_info.buf || !scratch->core_info.hbuf); assert(!can_stop_matching(scratch)); - // Fire the special EOD event literal. - if (t->hasEodEventLiteral) { - DEBUG_PRINTF("firing eod event id %u at offset %llu\n", - t->eodLiteralId, offset); - const struct core_info *ci = &scratch->core_info; - size_t len = ci->buf ? ci->len : ci->hlen; - assert(len || !ci->buf); /* len may be 0 if no history is required - * (bounds checks only can lead to this) */ - - roseRunEvent(len, t->eodLiteralId, &scratch->tctxt); - if (can_stop_matching(scratch)) { - DEBUG_PRINTF("user told us to stop\n"); - return; - } + // Run the unconditional EOD program. + if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) { + return; } roseCheckNfaEod(t, state, scratch, offset, is_streaming); - if (!t->eodIterOffset && !t->ematcherOffset) { + if (!t->eodIterProgramOffset && !t->ematcherOffset) { DEBUG_PRINTF("no eod accepts\n"); return; } // Handle pending EOD reports. - int itrv = roseEodRunIterator(t, state, offset, scratch); + int itrv = roseEodRunIterator(t, offset, scratch); if (itrv == MO_HALT_MATCHING) { return; } @@ -288,7 +274,7 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, cleanupAfterEodMatcher(t, state, offset, scratch); // Fire any new EOD reports. - roseEodRunIterator(t, state, offset, scratch); + roseEodRunIterator(t, offset, scratch); roseCheckEodSuffixes(t, state, offset, scratch); } diff --git a/src/rose/match.c b/src/rose/match.c index 591abcfb3..1c688aabb 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -31,6 +31,7 @@ #include "infix.h" #include "match.h" #include "miracle.h" +#include "program_runtime.h" #include "rose_program.h" #include "rose.h" #include "som/som_runtime.h" @@ -219,8 +220,7 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n", - id, tl->minDepth, tl->groups); + DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); pushDelayedMatches(tl, real_end, tctx); @@ -230,86 +230,6 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, return tctx->groups; } -static really_inline -hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc, - char is_mpv, char in_anchored, - char in_catchup) { - struct RoseContext *tctxt = &scratch->tctxt; - u8 *aa = getActiveLeafArray(t, tctxt->state); - struct fatbit *activeQueues = scratch->aqa; - u32 aaCount = t->activeArrayCount; - u32 qCount = t->queueCount; - - struct mq *q = &scratch->queues[qi]; - DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", - q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); - if (q_cur_loc(q) == loc) { - /* too many tops enqueued at the one spot; need to flatten this queue. - * We can use the full catchups as it will short circuit as we are - * already at this location. It also saves waking everybody up */ - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExec(q->nfa, q, loc); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (!in_catchup) { - if (is_mpv) { - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (loc + scratch->core_info.buf_offset - <= tctxt->minNonMpvMatchOffset) { - DEBUG_PRINTF("flushing chained\n"); - if (roseCatchUpMPV(t, tctxt->state, loc, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - goto done_queue_empty; - } - } - - if (roseCatchUpTo(t, tctxt->state, loc + scratch->core_info.buf_offset, - scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } else { - /* we must be a chained nfa */ - assert(is_mpv); - DEBUG_PRINTF("flushing chained\n"); - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (roseCatchUpMPV(t, tctxt->state, loc, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } -done_queue_empty: - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, tctxt); - nfaQueueInitState(q->nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(activeQueues, qCount, qi); - } - - assert(!isQueueFull(q)); - - if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { - if (!scratch->core_info.broken) { - scratch->core_info.broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc, - char in_anchored) { - return ensureQueueFlushed_i(t, scratch, qi, loc, 0, in_anchored, 0); -} - static really_inline hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, @@ -318,73 +238,6 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, in_chained); } -static rose_inline -hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, - u32 qi, u32 top, u64a som, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - - u8 *aa = getActiveLeafArray(t, tctxt->state); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - const u32 aaCount = t->activeArrayCount; - const u32 qCount = t->queueCount; - struct mq *q = &scratch->queues[qi]; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct NFA *nfa = getNfaByInfo(t, info); - - struct core_info *ci = &scratch->core_info; - s64a loc = (s64a)end - ci->buf_offset; - assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); - - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, tctxt); - nfaQueueInitState(nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(scratch->aqa, qCount, qi); - } else if (info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - /* nfa only needs one top; we can go home now */ - return HWLM_CONTINUE_MATCHING; - } else if (!fatbit_set(scratch->aqa, qCount, qi)) { - initQueue(q, qi, t, tctxt); - loadStreamState(nfa, q, 0); - pushQueueAt(q, 0, MQE_START, 0); - } else if (isQueueFull(q)) { - DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); - if (info->eod) { - /* can catch up suffix independently no pq */ - q->context = NULL; - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (ensureQueueFlushed(t, scratch, qi, loc, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - - assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); - pushQueueSom(q, top, loc, som); - - if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { - /* we may not run the nfa; need to ensure state is fine */ - DEBUG_PRINTF("empty run\n"); - pushQueueNoMerge(q, MQE_END, loc); - char alive = nfaQueueExec(nfa, q, loc); - if (alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else { - mmbit_unset(aa, aaCount, qi); - fatbit_unset(scratch->aqa, qCount, qi); - } - } - - return HWLM_CONTINUE_MATCHING; -} - static rose_inline void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, u64a end) { @@ -432,44 +285,6 @@ void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id, mmbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); } -/* handles the firing of external matches */ -static rose_inline -hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - assert(end == tctxt->minMatchOffset); - DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end); - updateLastMatchOffset(tctxt, end); - - int cb_rv = tctxt->cb(end, id, tctxt->userCtx); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { - if (!scratch->core_info.broken) { - scratch->core_info.broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, u64a end, struct RoseContext *tctxt, char in_anchored, char in_catchup) { @@ -556,906 +371,6 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, return HWLM_CONTINUE_MATCHING; } -/* catches up engines enough to ensure any earlier mpv triggers are enqueued - * and then adds the trigger to the mpv queue. Must not be called during catch - * up */ -static rose_inline -hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, - u8 *state, ReportID r, u64a end, - struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpMpvFeeders(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0); -} - -static rose_inline -hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end, - struct RoseContext *tctxt, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - // In SOM processing, we may be able to limit or entirely avoid catchup. - - DEBUG_PRINTF("entry\n"); - - if (end == tctxt->minMatchOffset) { - DEBUG_PRINTF("already caught up\n"); - return HWLM_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("catching up all NFAs\n"); - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - updateMinMatchOffset(tctxt, end); - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, - tctxt->minMatchOffset); - - // Reach into reports and handle internal reports that just manipulate SOM - // slots ourselves, rather than going through the callback. - - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - const struct internal_report *ri = getInternalReport(t, id); - handleSomInternal(scratch, ri, end); - - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state, - ReportID id, u64a start, u64a end, - struct RoseContext *tctxt, char in_anchored) { - if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id, - start, end); - DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset); - assert(end == tctxt->minMatchOffset); - - updateLastMatchOffset(tctxt, end); - int cb_rv = tctxt->cb_som(start, end, id, tctxt->userCtx); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - if (isAllExhausted(t, ci->exhaustionVector)) { - if (!ci->broken) { - ci->broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, u8 *state, ReportID id, - u64a start, u64a end, struct RoseContext *tctxt, - char in_anchored) { - DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", - id, start, end, tctxt->minMatchOffset); - - // Reach into reports and handle internal reports that just manipulate SOM - // slots ourselves, rather than going through the callback. - - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - const struct internal_report *ri = getInternalReport(t, id); - setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end); - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -char rosePrefixCheckMiracles(const struct RoseEngine *t, - const struct LeftNfaInfo *left, - struct core_info *ci, struct mq *q, u64a end) { - if (left->transient) { - // Miracles won't help us with transient leftfix engines; they only - // scan for a limited time anyway. - return 1; - } - - if (!left->stopTable) { - return 1; - } - - DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); - - const s64a begin_loc = q_cur_loc(q); - const s64a end_loc = end - ci->buf_offset; - - s64a miracle_loc; - if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { - goto found_miracle; - } - - if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, - &miracle_loc)) { - goto found_miracle; - } - - return 1; - -found_miracle: - DEBUG_PRINTF("miracle at %lld\n", miracle_loc); - assert(miracle_loc >= begin_loc); - - // If we're a prefix, then a miracle effectively results in us needing to - // re-init our state and start fresh. - if (!left->infix) { - if (miracle_loc != begin_loc) { - DEBUG_PRINTF("re-init prefix state\n"); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, miracle_loc); - pushQueueAt(q, 1, MQE_TOP, miracle_loc); - nfaQueueInitState(q->nfa, q); - } - return 1; - } - - // Otherwise, we're an infix. Remove tops before the miracle from the queue - // and re-init at that location. - - q_skip_forward_to(q, miracle_loc); - - if (q_last_type(q) == MQE_START) { - DEBUG_PRINTF("miracle caused infix to die\n"); - return 0; - } - - DEBUG_PRINTF("re-init infix state\n"); - assert(q->items[q->cur].type == MQE_START); - q->items[q->cur].location = miracle_loc; - nfaQueueInitState(q->nfa, q); - - return 1; -} - -static really_inline -char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, - ReportID leftfixReport, u64a end, - struct RoseContext *tctxt) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - struct core_info *ci = &scratch->core_info; - - u32 ri = queueToLeftIndex(t, qi); - const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", - (left->transient ? "transient" : "active"), - (left->infix ? "infix" : "prefix"), - ri, qi, leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - - struct mq *q = scratch->queues + qi; - u32 qCount = t->queueCount; - u32 arCount = t->activeLeftCount; - - if (!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, ri)) { - DEBUG_PRINTF("engine is dead nothing to see here\n"); - return 0; - } - - if (unlikely(end < leftfixLag)) { - assert(0); /* lag is the literal length */ - return 0; - } - - if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset - && !fatbit_isset(scratch->aqa, qCount, qi) - && isZombie(t, tctxt->state, left)) { - DEBUG_PRINTF("zombie\n"); - return 1; - } - - if (!fatbit_set(scratch->aqa, qCount, qi)) { - DEBUG_PRINTF("initing q %u\n", qi); - initRoseQueue(t, qi, left, tctxt); - if (ci->buf_offset) { // there have been writes before us! - s32 sp; - if (left->transient) { - sp = -(s32)ci->hlen; - } else { - sp = -(s32)loadRoseDelay(t, tctxt->state, left); - } - - /* transient nfas are always started fresh -> state not maintained - * at stream boundary */ - - pushQueueAt(q, 0, MQE_START, sp); - if (left->infix || (ci->buf_offset + sp > 0 && !left->transient)) { - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 1, MQE_TOP, sp); - nfaQueueInitState(q->nfa, q); - } - } else { // first write ever - pushQueueAt(q, 0, MQE_START, 0); - pushQueueAt(q, 1, MQE_TOP, 0); - nfaQueueInitState(q->nfa, q); - } - } - - s64a loc = (s64a)end - ci->buf_offset - leftfixLag; - assert(loc >= q_cur_loc(q)); - assert(leftfixReport != MO_INVALID_IDX); - - if (left->transient) { - s64a start_loc = loc - left->transient; - if (q_cur_loc(q) < start_loc) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, start_loc); - pushQueueAt(q, 1, MQE_TOP, start_loc); - nfaQueueInitState(q->nfa, q); - } - } - - if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { - if (left->infix) { - if (infixTooOld(q, loc)) { - DEBUG_PRINTF("infix %u died of old age\n", ri); - scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); - return 0; - } - - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - } - - if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { - DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); - scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); - return 0; - } - -#ifdef DEBUG - debugQueue(q); -#endif - - pushQueueNoMerge(q, MQE_END, loc); - - char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); - if (!rv) { /* nfa is dead */ - DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); - assert(!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, - ri)); - tctxt->groups &= left->squash_mask; - return 0; - } - - // Queue must have next start loc before we call nfaInAcceptState. - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv == MO_MATCHES_PENDING; - } else { - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - char rv = nfaInAcceptState(q->nfa, leftfixReport, q); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv; - } -} - -static rose_inline -void roseSetRole(const struct RoseEngine *t, u8 *state, - struct RoseContext *tctxt, u32 stateIndex, u8 depth) { - DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth); - mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex); - update_depth(tctxt, depth); -} - -static rose_inline -void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, - u32 topEvent, u8 cancel, struct RoseContext *tctxt) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - s64a loc = (s64a)end - ci->buf_offset; - - u32 ri = queueToLeftIndex(t, qi); - assert(topEvent < MQE_INVALID); - - const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); - assert(!left->transient); - - DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); - - struct mq *q = tctxtToScratch(tctxt)->queues + qi; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - - u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); - const u32 arCount = t->activeLeftCount; - char alive = mmbit_set(activeLeftArray, arCount, ri); - - if (alive && info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - return; - } - - struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; - const u32 qCount = t->queueCount; - - if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && - !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) { - DEBUG_PRINTF("yawn - zombie\n"); - return; - } - - if (cancel) { - DEBUG_PRINTF("dominating top: (re)init\n"); - fatbit_set(aqa, qCount, qi); - initRoseQueue(t, qi, left, tctxt); - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (!fatbit_set(aqa, qCount, qi)) { - DEBUG_PRINTF("initing %u\n", qi); - initRoseQueue(t, qi, left, tctxt); - if (alive) { - s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); - pushQueueAt(q, 0, MQE_START, sp); - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } - } else if (!alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (isQueueFull(q)) { - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - - if (isQueueFull(q)) { - /* still full - reduceQueue did nothing */ - DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, - q->end - q->cur); - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } - } - - pushQueueSom(q, topEvent, loc, start); -} - -static really_inline -int reachHasBit(const u8 *reach, u8 c) { - return !!(reach[c / 8U] & (u8)1U << (c % 8U)); -} - -/** - * \brief Scan around a literal, checking that that "lookaround" reach masks - * are satisfied. - */ -static rose_inline -int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex, - u32 lookaroundCount, u64a end, - struct RoseContext *tctxt) { - assert(lookaroundIndex != MO_INVALID_IDX); - assert(lookaroundCount > 0); - - const struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + lookaroundIndex; - const s8 *look_end = look + lookaroundCount; - assert(look < look_end); - - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; - - // The following code assumes that the lookaround structures are ordered by - // increasing offset. - - const s64a base_offset = end - ci->buf_offset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - DEBUG_PRINTF("first look has offset %d\n", *look); - - // If our first check tells us we need to look at an offset before the - // start of the stream, this role cannot match. - if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - // Skip over offsets that are before the history buffer. - do { - s64a offset = base_offset + *look; - if (offset >= -(s64a)ci->hlen) { - goto in_history; - } - DEBUG_PRINTF("look=%d before history\n", *look); - look++; - reach += REACH_BITVECTOR_LEN; - } while (look < look_end); - - // History buffer. - DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_history: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= 0) { - DEBUG_PRINTF("in buffer\n"); - goto in_buffer; - } - - assert(offset >= -(s64a)ci->hlen && offset < 0); - u8 c = ci->hbuf[ci->hlen + offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - // Current buffer. - DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_buffer: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - break; - } - - assert(offset >= 0 && offset < (s64a)ci->len); - u8 c = ci->buf[offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -static -int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, - void *context) { - u64a *som = context; - *som = MIN(*som, from_offset); - return MO_CONTINUE_MATCHING; -} - -static rose_inline -u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, - UNUSED const u32 leftfixLag, - struct RoseContext *tctxt) { - u32 ri = queueToLeftIndex(t, qi); - - UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", - left->transient ? "transient" : "active", ri, qi, - leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - - struct mq *q = tctxtToScratch(tctxt)->queues + qi; - - u64a start = ~0ULL; - - /* switch the callback + context for a fun one */ - q->som_cb = roseNfaEarliestSom; - q->context = &start; - - nfaReportCurrentMatches(q->nfa, q); - - /* restore the old callback + context */ - q->som_cb = roseNfaSomAdaptor; - q->context = NULL; - DEBUG_PRINTF("earliest som is %llu\n", start); - return start; -} - -static rose_inline -char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { - assert(max_bound <= ROSE_BOUND_INF); - assert(min_bound <= max_bound); - - if (end < min_bound) { - return 0; - } - return max_bound == ROSE_BOUND_INF || end <= max_bound; -} - -#define PROGRAM_CASE(name) \ - case ROSE_INSTR_##name: { \ - DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \ - const struct ROSE_STRUCT_##name *ri = \ - (const struct ROSE_STRUCT_##name *)pc; - -#define PROGRAM_NEXT_INSTRUCTION \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -static really_inline -hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, - u64a end, u64a *som, struct RoseContext *tctxt, - char in_anchored, int *work_done) { - DEBUG_PRINTF("program begins at offset %u\n", programOffset); - - assert(programOffset); - assert(programOffset < t->size); - - const char *pc = getByOffset(t, programOffset); - - assert(*(const u8 *)pc != ROSE_INSTR_END); - - for (;;) { - assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); - u8 code = *(const u8 *)pc; - assert(code <= ROSE_INSTR_END); - - switch ((enum RoseInstructionCode)code) { - PROGRAM_CASE(ANCHORED_DELAY) { - if (in_anchored && end > t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("delay until playback\n"); - update_depth(tctxt, ri->depth); - tctxt->groups |= ri->groups; - *work_done = 1; - assert(ri->done_jump); // must progress - pc += ri->done_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_ONLY_EOD) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - if (end != ci->buf_offset + ci->len) { - DEBUG_PRINTF("should only match at end of data\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BOUNDS) { - if (!in_anchored && - !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { - DEBUG_PRINTF("failed root bounds check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_NOT_HANDLED) { - struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles; - if (fatbit_set(handled, t->handledKeyCount, ri->key)) { - DEBUG_PRINTF("key %u already set\n", ri->key); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LEFTFIX) { - if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, - tctxt)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ADJUST) { - assert(ri->distance <= end); - *som = end - ri->distance; - DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, *som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_LEFTFIX) { - *som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt); - DEBUG_PRINTF("som from leftfix is %llu\n", *som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_INFIX) { - roseTriggerInfix(t, *som, end, ri->queue, ri->event, ri->cancel, - tctxt); - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_SUFFIX) { - if (roseHandleSuffixTrigger(t, ri->queue, ri->event, *som, end, - tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT) { - if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_CHAIN) { - if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report, - end, tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_EOD) { - if (tctxt->cb(end, ri->report, tctxt->userCtx) == - MO_HALT_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_INT) { - if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM) { - if (roseHandleSomSom(t, tctxt->state, ri->report, *som, end, - tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_KNOWN) { - if (roseHandleSomMatch(t, tctxt->state, ri->report, *som, end, - tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_STATE) { - roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth); - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_GROUPS) { - tctxt->groups |= ri->groups; - DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, - tctxt->groups); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(END) { - DEBUG_PRINTF("finished\n"); - return HWLM_CONTINUE_MATCHING; - } - PROGRAM_NEXT_INSTRUCTION - } - } - - assert(0); // unreachable - return HWLM_CONTINUE_MATCHING; -} - -#undef PROGRAM_CASE -#undef PROGRAM_NEXT_INSTRUCTION - -hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, - u64a end, u64a *som, struct RoseContext *tctxt, - int *work_done) { - return roseRunRoleProgram_i(t, programOffset, end, som, tctxt, 0, - work_done); -} - -static really_inline -void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) { - assert(tl->squashesGroup); - - // we should be squashing a single group - assert(popcount64(tl->groups) == 1); - - DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n", - ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups); - - tctxt->groups &= ~tl->groups; -} - -// Run the sparse iterator for this literal and use that to discover which -// roles to consider. -/* Note: uses the stashed sparse iter state; cannot be called from - * anybody else who is using it */ -/* Note: uses the handled role mmbit; cannot be called from - * anybody else who is using it (nobody else should be) */ -/* non-root roles should not occur in any anchored context */ -static really_inline -hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - /* assert(!tctxt->in_anchored); */ - /* assert(!tctxt->in_anch_playback); */ - const u32 *iterProgram = getByOffset(t, tl->iterProgramOffset); - const struct mmbit_sparse_iter *it = getByOffset(t, tl->iterOffset); - assert(ISALIGNED(iterProgram)); - assert(ISALIGNED(it)); - - // Sparse iterator state was allocated earlier - struct mmbit_sparse_state *s = tctxtToScratch(tctxt)->sparse_iter_state; - struct fatbit *handled_roles = tctxtToScratch(tctxt)->handled_roles; - - const u32 numStates = t->rolesWithStateCount; - - void *role_state = getRoleState(tctxt->state); - u32 idx = 0; - int work_done = 0; // set to 1 if we actually process any roles - u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s); - - fatbit_clear(handled_roles); - - for (; i != MMB_INVALID; - i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { - u32 programOffset = iterProgram[idx]; - DEBUG_PRINTF("pred state %u (iter idx=%u) is on -> program %u\n", i, - idx, programOffset); - - // If this bit is switched on in the sparse iterator, it must be - // driving a program. - assert(programOffset); - - u64a som = 0ULL; - if (roseRunRoleProgram_i(t, programOffset, end, &som, tctxt, 0, - &work_done) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return HWLM_CONTINUE_MATCHING; -} - -// Walk the set of root roles (roles with depth 1) associated with this literal -// and set them on. -static really_inline -char roseWalkRootRoles_i(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt, char in_anchored) { - if (!tl->rootProgramOffset) { - return 1; - } - - DEBUG_PRINTF("running literal root program at %u\n", tl->rootProgramOffset); - - u64a som = 0; - int work_done = 0; - - if (roseRunRoleProgram_i(t, tl->rootProgramOffset, end, &som, tctxt, - in_anchored, - &work_done) == HWLM_TERMINATE_MATCHING) { - return 0; - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return 1; -} - -static never_inline -char roseWalkRootRoles_A(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i(t, tl, end, tctxt, 1); -} - -static never_inline -char roseWalkRootRoles_N(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i(t, tl, end, tctxt, 0); -} - -static really_inline -char roseWalkRootRoles(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt, char in_anchored, - char in_anch_playback) { - assert(!in_anch_playback || tl->rootProgramOffset); - if (!in_anch_playback && !tl->rootProgramOffset) { - return 1; - } - - if (in_anchored) { - return roseWalkRootRoles_A(t, tl, end, tctxt); - } else { - return roseWalkRootRoles_N(t, tl, end, tctxt); - } -} - /* handles catchup, som, cb, etc */ static really_inline hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, u8 *state, @@ -1553,31 +468,33 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - assert(tl->rootProgramOffset); + assert(tl->programOffset); assert(!tl->delay_mask); - DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n", id, - tl->minDepth, tl->groups); + DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); if (real_end <= t->floatingMinLiteralMatchOffset) { roseFlushLastByteHistory(t, state, real_end, tctxt); tctxt->lastEndOffset = real_end; } - /* anchored literals are root only */ - if (!roseWalkRootRoles(t, tl, real_end, tctxt, 1, 0)) { - rv = HWLM_TERMINATE_MATCHING; - } - - DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); - - if (rv == HWLM_TERMINATE_MATCHING) { + int work_done = 0; + if (roseRunProgram(t, tl->programOffset, real_end, tctxt, 1, &work_done) == + HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(tctxtToScratch(tctxt))); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; } + // If we've actually handled any roles, we might need to apply this + // literal's squash mask to our groups as well. + if (work_done && tl->squashesGroup) { + roseSquashGroup(tctxt, tl); + } + + DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, + tctxt->groups); + if (real_end > t->floatingMinLiteralMatchOffset) { recordAnchoredLiteralMatch(tctxt, id, real_end); } @@ -1623,8 +540,7 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("lit id=%u, minDepth=%u, groups=0x%016llx\n", id, tl->minDepth, - tl->groups); + DEBUG_PRINTF("lit id=%u, groups=0x%016llx\n", id, tl->groups); if (do_group_check && !(tl->groups & tctxt->groups)) { DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n"); @@ -1643,28 +559,21 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, return HWLM_CONTINUE_MATCHING; } - if (tl->minDepth > tctxt->depth) { - DEBUG_PRINTF("IGNORE: minDepth=%u > %u\n", tl->minDepth, tctxt->depth); - goto root_roles; - } - - /* the depth checks will normally prevent roles without a spare iterator - * from reaching here (root roles) (and only root roles should be seen - * during anch play back). */ - assert(tl->iterOffset == ROSE_OFFSET_INVALID || !in_anch_playback); - if (tl->iterOffset != ROSE_OFFSET_INVALID && !in_anch_playback) { - hwlmcb_rv_t rv = roseWalkSparseIterator(t, tl, end, tctxt); + int work_done = 0; - if (rv == HWLM_TERMINATE_MATCHING) { + if (tl->programOffset) { + DEBUG_PRINTF("running program at %u\n", tl->programOffset); + if (roseRunProgram(t, tl->programOffset, end, tctxt, 0, &work_done) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + } -root_roles: - // Process "root roles", i.e. depth 1 roles for this literal - if (!roseWalkRootRoles(t, tl, end, tctxt, 0 /* in_anchored */, - in_anch_playback)) { - return HWLM_TERMINATE_MATCHING; + // If we've actually handled any roles, we might need to apply this + // literal's squash mask to our groups as well. + if (work_done && tl->squashesGroup) { + roseSquashGroup(tctxt, tl); } return HWLM_CONTINUE_MATCHING; @@ -1966,31 +875,3 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { DEBUG_PRINTF("user requested halt\n"); return HWLM_TERMINATE_MATCHING; } - -// Specialised cut-down roseCallback for running ROSE_EVENT "literals", like the -// EOD one. -void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt) { - const struct RoseEngine *t = tctxt->t; - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - u64a real_end = ci->buf_offset - ci->hlen + end; - - DEBUG_PRINTF("EVENT id=%u offset=%llu\n", id, real_end); - - // Caller should guard against broken stream. - assert(!can_stop_matching(tctxtToScratch(tctxt))); - - // Shouldn't be here if we're a real literal with benefits. - assert(id >= t->nonbenefits_base_id); - - // At the moment, this path is only used for the EOD event. - assert(id == t->eodLiteralId); - - // There should be no pending delayed literals. - assert(!tctxt->filledDelayedSlots); - - // Note: we throw away the return value. - roseProcessMatch_i(t, real_end, id, tctxt, 0, 0, 0); - - DEBUG_PRINTF("DONE depth=%hhu, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); -} diff --git a/src/rose/match.h b/src/rose/match.h index 6bcf781e7..86e221831 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -55,7 +55,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx); hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, void *ctx); int roseAnchoredCallback(u64a end, u32 id, void *ctx); -void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt); /* Common code, used all over Rose runtime */ @@ -299,8 +298,4 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, scratch->sparse_iter_state); } -hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, - u64a end, u64a *som, struct RoseContext *tctxt, - int *work_done); - #endif diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h new file mode 100644 index 000000000..08dbff1f0 --- /dev/null +++ b/src/rose/program_runtime.h @@ -0,0 +1,1081 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PROGRAM_RUNTIME_H +#define PROGRAM_RUNTIME_H + +#include "catchup.h" +#include "counting_miracle.h" +#include "infix.h" +#include "match.h" +#include "miracle.h" +#include "rose.h" +#include "rose_internal.h" +#include "rose_program.h" +#include "rose_types.h" +#include "runtime.h" +#include "scratch.h" +#include "ue2common.h" +#include "util/fatbit.h" +#include "util/multibit.h" + +static rose_inline +char rosePrefixCheckMiracles(const struct RoseEngine *t, + const struct LeftNfaInfo *left, + struct core_info *ci, struct mq *q, u64a end) { + if (left->transient) { + // Miracles won't help us with transient leftfix engines; they only + // scan for a limited time anyway. + return 1; + } + + if (!left->stopTable) { + return 1; + } + + DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); + + const s64a begin_loc = q_cur_loc(q); + const s64a end_loc = end - ci->buf_offset; + + s64a miracle_loc; + if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { + goto found_miracle; + } + + if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, + &miracle_loc)) { + goto found_miracle; + } + + return 1; + +found_miracle: + DEBUG_PRINTF("miracle at %lld\n", miracle_loc); + assert(miracle_loc >= begin_loc); + + // If we're a prefix, then a miracle effectively results in us needing to + // re-init our state and start fresh. + if (!left->infix) { + if (miracle_loc != begin_loc) { + DEBUG_PRINTF("re-init prefix state\n"); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, miracle_loc); + pushQueueAt(q, 1, MQE_TOP, miracle_loc); + nfaQueueInitState(q->nfa, q); + } + return 1; + } + + // Otherwise, we're an infix. Remove tops before the miracle from the queue + // and re-init at that location. + + q_skip_forward_to(q, miracle_loc); + + if (q_last_type(q) == MQE_START) { + DEBUG_PRINTF("miracle caused infix to die\n"); + return 0; + } + + DEBUG_PRINTF("re-init infix state\n"); + assert(q->items[q->cur].type == MQE_START); + q->items[q->cur].location = miracle_loc; + nfaQueueInitState(q->nfa, q); + + return 1; +} + +static really_inline +hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc, + char is_mpv, char in_anchored, + char in_catchup) { + struct RoseContext *tctxt = &scratch->tctxt; + u8 *aa = getActiveLeafArray(t, tctxt->state); + struct fatbit *activeQueues = scratch->aqa; + u32 aaCount = t->activeArrayCount; + u32 qCount = t->queueCount; + + struct mq *q = &scratch->queues[qi]; + DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", + q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); + if (q_cur_loc(q) == loc) { + /* too many tops enqueued at the one spot; need to flatten this queue. + * We can use the full catchups as it will short circuit as we are + * already at this location. It also saves waking everybody up */ + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExec(q->nfa, q, loc); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (!in_catchup) { + if (is_mpv) { + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (loc + scratch->core_info.buf_offset + <= tctxt->minNonMpvMatchOffset) { + DEBUG_PRINTF("flushing chained\n"); + if (roseCatchUpMPV(t, tctxt->state, loc, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + goto done_queue_empty; + } + } + + if (roseCatchUpTo(t, tctxt->state, loc + scratch->core_info.buf_offset, + scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } else { + /* we must be a chained nfa */ + assert(is_mpv); + DEBUG_PRINTF("flushing chained\n"); + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (roseCatchUpMPV(t, tctxt->state, loc, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } +done_queue_empty: + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, tctxt); + nfaQueueInitState(q->nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(activeQueues, qCount, qi); + } + + assert(!isQueueFull(q)); + + if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { + if (!scratch->core_info.broken) { + scratch->core_info.broken = BROKEN_EXHAUSTED; + } + tctxt->groups = 0; + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc, + char in_anchored) { + return ensureQueueFlushed_i(t, scratch, qi, loc, 0, in_anchored, 0); +} + +static rose_inline +hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, + u32 qi, u32 top, u64a som, + u64a end, struct RoseContext *tctxt, + char in_anchored) { + DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); + + u8 *aa = getActiveLeafArray(t, tctxt->state); + struct hs_scratch *scratch = tctxtToScratch(tctxt); + const u32 aaCount = t->activeArrayCount; + const u32 qCount = t->queueCount; + struct mq *q = &scratch->queues[qi]; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + const struct NFA *nfa = getNfaByInfo(t, info); + + struct core_info *ci = &scratch->core_info; + s64a loc = (s64a)end - ci->buf_offset; + assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); + + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, tctxt); + nfaQueueInitState(nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(scratch->aqa, qCount, qi); + } else if (info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + /* nfa only needs one top; we can go home now */ + return HWLM_CONTINUE_MATCHING; + } else if (!fatbit_set(scratch->aqa, qCount, qi)) { + initQueue(q, qi, t, tctxt); + loadStreamState(nfa, q, 0); + pushQueueAt(q, 0, MQE_START, 0); + } else if (isQueueFull(q)) { + DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); + if (info->eod) { + /* can catch up suffix independently no pq */ + q->context = NULL; + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (ensureQueueFlushed(t, scratch, qi, loc, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + + assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); + pushQueueSom(q, top, loc, som); + + if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { + /* we may not run the nfa; need to ensure state is fine */ + DEBUG_PRINTF("empty run\n"); + pushQueueNoMerge(q, MQE_END, loc); + char alive = nfaQueueExec(nfa, q, loc); + if (alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else { + mmbit_unset(aa, aaCount, qi); + fatbit_unset(scratch->aqa, qCount, qi); + } + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, + ReportID leftfixReport, u64a end, + struct RoseContext *tctxt) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct core_info *ci = &scratch->core_info; + + u32 ri = queueToLeftIndex(t, qi); + const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", + (left->transient ? "transient" : "active"), + (left->infix ? "infix" : "prefix"), + ri, qi, leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + + struct mq *q = scratch->queues + qi; + u32 qCount = t->queueCount; + u32 arCount = t->activeLeftCount; + + if (!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, ri)) { + DEBUG_PRINTF("engine is dead nothing to see here\n"); + return 0; + } + + if (unlikely(end < leftfixLag)) { + assert(0); /* lag is the literal length */ + return 0; + } + + if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset + && !fatbit_isset(scratch->aqa, qCount, qi) + && isZombie(t, tctxt->state, left)) { + DEBUG_PRINTF("zombie\n"); + return 1; + } + + if (!fatbit_set(scratch->aqa, qCount, qi)) { + DEBUG_PRINTF("initing q %u\n", qi); + initRoseQueue(t, qi, left, tctxt); + if (ci->buf_offset) { // there have been writes before us! + s32 sp; + if (left->transient) { + sp = -(s32)ci->hlen; + } else { + sp = -(s32)loadRoseDelay(t, tctxt->state, left); + } + + /* transient nfas are always started fresh -> state not maintained + * at stream boundary */ + + pushQueueAt(q, 0, MQE_START, sp); + if (left->infix || (ci->buf_offset + sp > 0 && !left->transient)) { + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 1, MQE_TOP, sp); + nfaQueueInitState(q->nfa, q); + } + } else { // first write ever + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + nfaQueueInitState(q->nfa, q); + } + } + + s64a loc = (s64a)end - ci->buf_offset - leftfixLag; + assert(loc >= q_cur_loc(q)); + assert(leftfixReport != MO_INVALID_IDX); + + if (left->transient) { + s64a start_loc = loc - left->transient; + if (q_cur_loc(q) < start_loc) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, start_loc); + pushQueueAt(q, 1, MQE_TOP, start_loc); + nfaQueueInitState(q->nfa, q); + } + } + + if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { + if (left->infix) { + if (infixTooOld(q, loc)) { + DEBUG_PRINTF("infix %u died of old age\n", ri); + scratch->tctxt.groups &= left->squash_mask; + mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + return 0; + } + + reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + } + + if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { + DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); + scratch->tctxt.groups &= left->squash_mask; + mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + return 0; + } + +#ifdef DEBUG + debugQueue(q); +#endif + + pushQueueNoMerge(q, MQE_END, loc); + + char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); + if (!rv) { /* nfa is dead */ + DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); + mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + assert(!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, + ri)); + tctxt->groups &= left->squash_mask; + return 0; + } + + // Queue must have next start loc before we call nfaInAcceptState. + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv == MO_MATCHES_PENDING; + } else { + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + char rv = nfaInAcceptState(q->nfa, leftfixReport, q); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv; + } +} + +static rose_inline +void roseSetRole(const struct RoseEngine *t, u8 *state, + struct RoseContext *tctxt, u32 stateIndex, u8 depth) { + DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth); + mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex); + update_depth(tctxt, depth); +} + +static rose_inline +void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, + u32 topEvent, u8 cancel, struct RoseContext *tctxt) { + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + s64a loc = (s64a)end - ci->buf_offset; + + u32 ri = queueToLeftIndex(t, qi); + assert(topEvent < MQE_INVALID); + + const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); + assert(!left->transient); + + DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); + + struct mq *q = tctxtToScratch(tctxt)->queues + qi; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + + u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); + const u32 arCount = t->activeLeftCount; + char alive = mmbit_set(activeLeftArray, arCount, ri); + + if (alive && info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + return; + } + + struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; + const u32 qCount = t->queueCount; + + if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && + !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) { + DEBUG_PRINTF("yawn - zombie\n"); + return; + } + + if (cancel) { + DEBUG_PRINTF("dominating top: (re)init\n"); + fatbit_set(aqa, qCount, qi); + initRoseQueue(t, qi, left, tctxt); + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (!fatbit_set(aqa, qCount, qi)) { + DEBUG_PRINTF("initing %u\n", qi); + initRoseQueue(t, qi, left, tctxt); + if (alive) { + s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); + pushQueueAt(q, 0, MQE_START, sp); + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } + } else if (!alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (isQueueFull(q)) { + reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + + if (isQueueFull(q)) { + /* still full - reduceQueue did nothing */ + DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, + q->end - q->cur); + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } + + pushQueueSom(q, topEvent, loc, start); +} + +/* handles the firing of external matches */ +static rose_inline +hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id, + u64a end, struct RoseContext *tctxt, + char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + if (roseCatchUpTo(t, state, end, scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + assert(end == tctxt->minMatchOffset); + DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end); + updateLastMatchOffset(tctxt, end); + + int cb_rv = tctxt->cb(end, id, tctxt->userCtx); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { + if (!scratch->core_info.broken) { + scratch->core_info.broken = BROKEN_EXHAUSTED; + } + tctxt->groups = 0; + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +/* catches up engines enough to ensure any earlier mpv triggers are enqueued + * and then adds the trigger to the mpv queue. Must not be called during catch + * up */ +static rose_inline +hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, + u8 *state, ReportID r, u64a end, + struct RoseContext *tctxt, + char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + if (roseCatchUpMpvFeeders(t, state, end, scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0); +} + +static rose_inline +hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end, + struct RoseContext *tctxt, char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + // In SOM processing, we may be able to limit or entirely avoid catchup. + + DEBUG_PRINTF("entry\n"); + + if (end == tctxt->minMatchOffset) { + DEBUG_PRINTF("already caught up\n"); + return HWLM_CONTINUE_MATCHING; + } + + DEBUG_PRINTF("catching up all NFAs\n"); + if (roseCatchUpTo(t, state, end, scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + updateMinMatchOffset(tctxt, end); + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id, + u64a end, struct RoseContext *tctxt, + char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, + tctxt->minMatchOffset); + + // Reach into reports and handle internal reports that just manipulate SOM + // slots ourselves, rather than going through the callback. + + if (roseSomCatchup(t, state, end, tctxt, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + const struct internal_report *ri = getInternalReport(t, id); + handleSomInternal(scratch, ri, end); + + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state, + ReportID id, u64a start, u64a end, + struct RoseContext *tctxt, char in_anchored) { + if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id, + start, end); + DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset); + assert(end == tctxt->minMatchOffset); + + updateLastMatchOffset(tctxt, end); + int cb_rv = tctxt->cb_som(start, end, id, tctxt->userCtx); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + if (isAllExhausted(t, ci->exhaustionVector)) { + if (!ci->broken) { + ci->broken = BROKEN_EXHAUSTED; + } + tctxt->groups = 0; + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, u8 *state, ReportID id, + u64a start, u64a end, struct RoseContext *tctxt, + char in_anchored) { + DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", + id, start, end, tctxt->minMatchOffset); + + // Reach into reports and handle internal reports that just manipulate SOM + // slots ourselves, rather than going through the callback. + + if (roseSomCatchup(t, state, end, tctxt, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + const struct internal_report *ri = getInternalReport(t, id); + setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end); + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +int reachHasBit(const u8 *reach, u8 c) { + return !!(reach[c / 8U] & (u8)1U << (c % 8U)); +} + +/** + * \brief Scan around a literal, checking that that "lookaround" reach masks + * are satisfied. + */ +static rose_inline +int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex, + u32 lookaroundCount, u64a end, + struct RoseContext *tctxt) { + assert(lookaroundIndex != MO_INVALID_IDX); + assert(lookaroundCount > 0); + + const struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const u8 *base = (const u8 *)t; + const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); + const s8 *look = look_base + lookaroundIndex; + const s8 *look_end = look + lookaroundCount; + assert(look < look_end); + + const u8 *reach_base = base + t->lookaroundReachOffset; + const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + + // The following code assumes that the lookaround structures are ordered by + // increasing offset. + + const s64a base_offset = end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("first look has offset %d\n", *look); + + // If our first check tells us we need to look at an offset before the + // start of the stream, this role cannot match. + if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + // Skip over offsets that are before the history buffer. + do { + s64a offset = base_offset + *look; + if (offset >= -(s64a)ci->hlen) { + goto in_history; + } + DEBUG_PRINTF("look=%d before history\n", *look); + look++; + reach += REACH_BITVECTOR_LEN; + } while (look < look_end); + + // History buffer. + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_history: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + goto in_buffer; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + // Current buffer. + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_buffer: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static +int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, + void *context) { + u64a *som = context; + *som = MIN(*som, from_offset); + return MO_CONTINUE_MATCHING; +} + +static rose_inline +u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, + UNUSED const u32 leftfixLag, + struct RoseContext *tctxt) { + u32 ri = queueToLeftIndex(t, qi); + + UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", + left->transient ? "transient" : "active", ri, qi, + leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + + struct mq *q = tctxtToScratch(tctxt)->queues + qi; + + u64a start = ~0ULL; + + /* switch the callback + context for a fun one */ + q->som_cb = roseNfaEarliestSom; + q->context = &start; + + nfaReportCurrentMatches(q->nfa, q); + + /* restore the old callback + context */ + q->som_cb = roseNfaSomAdaptor; + q->context = NULL; + DEBUG_PRINTF("earliest som is %llu\n", start); + return start; +} + +static rose_inline +char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + if (end < min_bound) { + return 0; + } + return max_bound == ROSE_BOUND_INF || end <= max_bound; +} + + +#define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +static really_inline +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, + u64a end, struct RoseContext *tctxt, + char in_anchored, int *work_done) { + DEBUG_PRINTF("program begins at offset %u\n", programOffset); + + assert(programOffset); + assert(programOffset < t->size); + + const char *pc_base = getByOffset(t, programOffset); + const char *pc = pc_base; + + u64a som = 0; + + assert(*(const u8 *)pc != ROSE_INSTR_END); + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + u8 code = *(const u8 *)pc; + assert(code <= ROSE_INSTR_END); + + switch ((enum RoseInstructionCode)code) { + PROGRAM_CASE(ANCHORED_DELAY) { + if (in_anchored && end > t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("delay until playback\n"); + update_depth(tctxt, ri->depth); + tctxt->groups |= ri->groups; + *work_done = 1; + assert(ri->done_jump); // must progress + pc += ri->done_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_DEPTH) { + DEBUG_PRINTF("current depth %u, check min depth %u\n", + tctxt->depth, ri->min_depth); + if (ri->min_depth > tctxt->depth) { + DEBUG_PRINTF("failed depth check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + if (end != ci->buf_offset + ci->len) { + DEBUG_PRINTF("should only match at end of data\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BOUNDS) { + if (!in_anchored && + !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed root bounds check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles; + if (fatbit_set(handled, t->handledKeyCount, ri->key)) { + DEBUG_PRINTF("key %u already set\n", ri->key); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LEFTFIX) { + if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, + tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + assert(ri->distance <= end); + som = end - ri->distance; + DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt); + DEBUG_PRINTF("som from leftfix is %llu\n", som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel, + tctxt); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseHandleSuffixTrigger(t, ri->queue, ri->event, som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report, + end, tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EOD) { + if (tctxt->cb(end, ri->report, tctxt->userCtx) == + MO_HALT_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + if (roseHandleSomSom(t, tctxt->state, ri->report, som, end, + tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_KNOWN) { + if (roseHandleSomMatch(t, tctxt->state, ri->report, som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + tctxt->groups |= ri->groups; + DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct mmbit_sparse_state *s = scratch->sparse_iter_state; + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(getRoleState(tctxt->state), + t->rolesWithStateCount, &idx, + it, s); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + + fatbit_clear(scratch->handled_roles); + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + continue; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset, + ri->state); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct mmbit_sparse_state *s = scratch->sparse_iter_state; + + u32 idx = 0; + u32 i = mmbit_sparse_iter_next(getRoleState(tctxt->state), + t->rolesWithStateCount, + ri->state, &idx, it, s); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no more states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + continue; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + } + } + + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; +} + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION + +static rose_inline +void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) { + assert(tl->squashesGroup); + + // we should be squashing a single group + assert(popcount64(tl->groups) == 1); + + DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n", + ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups); + + tctxt->groups &= ~tl->groups; +} + +#endif // PROGRAM_RUNTIME_H diff --git a/src/rose/rose.h b/src/rose/rose.h index 6bebdf100..491449888 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -45,6 +45,39 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, void *context); +static really_inline +int roseBlockHasEodWork(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (t->ematcherOffset) { + DEBUG_PRINTF("eod matcher to run\n"); + return 1; + } + + if (t->eodProgramOffset) { + DEBUG_PRINTF("has eod program\n"); + return 1; + } + + void *state = scratch->core_info.state; + if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + DEBUG_PRINTF("active outfix/suffix engines\n"); + return 1; + } + + if (t->eodIterOffset) { + u32 idx; + const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); + struct mmbit_sparse_state *s = scratch->sparse_iter_state; + if (mmbit_sparse_iter_begin(getRoleState(state), t->rolesWithStateCount, + &idx, it, s) != MMB_INVALID) { + DEBUG_PRINTF("eod iter has states on\n"); + return 1; + } + } + + return 0; +} + /* assumes core_info in scratch has been init to point to data */ static really_inline void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, @@ -77,19 +110,8 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, return; } - struct mmbit_sparse_state *s = scratch->sparse_iter_state; - const u32 numStates = t->rolesWithStateCount; - u8 *state = (u8 *)scratch->core_info.state; - void *role_state = getRoleState(state); - u32 idx = 0; - const struct mmbit_sparse_iter *it - = (const void *)((const u8 *)t + t->eodIterOffset); - - if (!t->ematcherOffset && !t->hasEodEventLiteral - && !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount) - && (!t->eodIterOffset - || mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s) - == MMB_INVALID)) { + if (!roseBlockHasEodWork(t, scratch)) { + DEBUG_PRINTF("no eod work\n"); return; } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 708d3c8a8..0a0318d30 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -170,6 +170,7 @@ class RoseInstruction { const void *get() const { switch (code()) { + case ROSE_INSTR_CHECK_DEPTH: return &u.checkDepth; case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; @@ -188,6 +189,8 @@ class RoseInstruction { case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; case ROSE_INSTR_SET_STATE: return &u.setState; case ROSE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; + case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; case ROSE_INSTR_END: return &u.end; } assert(0); @@ -196,6 +199,7 @@ class RoseInstruction { size_t length() const { switch (code()) { + case ROSE_INSTR_CHECK_DEPTH: return sizeof(u.checkDepth); case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); @@ -214,12 +218,15 @@ class RoseInstruction { case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); + case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); case ROSE_INSTR_END: return sizeof(u.end); } return 0; } union { + ROSE_STRUCT_CHECK_DEPTH checkDepth; ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; @@ -238,6 +245,8 @@ class RoseInstruction { ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; ROSE_STRUCT_SET_STATE setState; ROSE_STRUCT_SET_GROUPS setGroups; + ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; + ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; ROSE_STRUCT_END end; } u; }; @@ -2565,7 +2574,7 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { */ static vector -flattenRoleProgram(const vector> &programs) { +flattenProgram(const vector> &programs) { vector out; vector offsets; // offset of each instruction (bytes) @@ -2601,6 +2610,10 @@ flattenRoleProgram(const vector> &programs) { assert(targets[i] > offsets[i]); // jumps always progress ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; break; + case ROSE_INSTR_CHECK_DEPTH: + assert(targets[i] > offsets[i]); + ri.u.checkDepth.fail_jump = targets[i] - offsets[i]; + break; case ROSE_INSTR_CHECK_ONLY_EOD: assert(targets[i] > offsets[i]); ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; @@ -2630,9 +2643,13 @@ flattenRoleProgram(const vector> &programs) { } static -u32 writeRoleProgram(build_context &bc, vector &program) { - DEBUG_PRINTF("writing %zu instructions\n", program.size()); +u32 writeProgram(build_context &bc, vector &program) { + if (program.empty()) { + DEBUG_PRINTF("no program\n"); + return 0; + } + DEBUG_PRINTF("writing %zu instructions\n", program.size()); u32 programOffset = 0; for (const auto &ri : program) { u32 offset = @@ -2696,32 +2713,6 @@ bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, return false; } -/* creates (and adds to rose) a sparse iterator visiting pred states/roles, - * returns a pair: - * - the offset of the itermap - * - the offset for the sparse iterator. - */ -static -pair addPredSparseIter(build_context &bc, - const map &predPrograms) { - vector keys; - vector programTable; - for (const auto &elem : predPrograms) { - keys.push_back(elem.first); - programTable.push_back(elem.second); - } - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - DEBUG_PRINTF("iter size = %zu\n", iter.size()); - - u32 iterOffset = addIteratorToTable(bc, iter); - u32 programTableOffset = - add_to_engine_blob(bc, begin(programTable), end(programTable)); - return make_pair(programTableOffset, iterOffset); -} - static void fillLookaroundTables(char *look_base, char *reach_base, const vector &look_vec) { @@ -2770,7 +2761,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, * literal entry */ const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id); const rose_literal_info &arb_lit_info = **lit_infos.begin(); - const auto &vertices = arb_lit_info.vertices; literalTable.push_back(RoseLiteral()); RoseLiteral &tl = literalTable.back(); @@ -2784,11 +2774,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED || tbi.literals.right.at(literalId).table == ROSE_EVENT); - // Minimum depth based on this literal's roles. - tl.minDepth = calcMinDepth(bc.depths, vertices); - - DEBUG_PRINTF("lit %u: role minDepth=%u\n", final_id, tl.minDepth); - // If this literal squashes its group behind it, store that data too tl.squashesGroup = arb_lit_info.squash_group; @@ -3150,8 +3135,8 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, } static -vector makeRoleProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { +vector makeProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); @@ -3185,69 +3170,6 @@ vector makeRoleProgram(RoseBuildImpl &build, build_context &bc, return program; } -static -void findRootEdges(const RoseBuildImpl &build, RoseVertex src, - map> &root_edges_map) { - const auto &g = build.g; - for (const auto &e : out_edges_range(src, g)) { - const auto &v = target(e, g); - if (build.hasDirectFinalId(v)) { - continue; // Skip direct reports. - } - for (auto lit_id : g[v].literals) { - assert(lit_id < build.literal_info.size()); - u32 final_id = build.literal_info.at(lit_id).final_id; - if (final_id != MO_INVALID_IDX) { - root_edges_map[final_id].insert(e); - } - } - } -} - -static -void buildRootRolePrograms(RoseBuildImpl &build, build_context &bc, - vector &literalTable) { - const auto &g = build.g; - - map> root_edges_map; // lit id -> root edges - findRootEdges(build, build.root, root_edges_map); - findRootEdges(build, build.anchored_root, root_edges_map); - - for (u32 id = 0; id < literalTable.size(); id++) { - const auto &root_edges = root_edges_map[id]; - DEBUG_PRINTF("lit %u has %zu root edges\n", id, root_edges.size()); - - // Sort edges by (source, target) vertex indices to ensure - // deterministic program construction. - vector ordered_edges(begin(root_edges), end(root_edges)); - sort(begin(ordered_edges), end(ordered_edges), - [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].idx, g[target(a, g)].idx) < - tie(g[source(b, g)].idx, g[target(b, g)].idx); - }); - - vector> root_prog; - for (const auto &e : ordered_edges) { - DEBUG_PRINTF("edge (%zu,%zu)\n", g[source(e, g)].idx, - g[target(e, g)].idx); - auto role_prog = makeRoleProgram(build, bc, e); - if (role_prog.empty()) { - continue; - } - root_prog.push_back(role_prog); - } - - RoseLiteral &tl = literalTable[id]; - if (root_prog.empty()) { - tl.rootProgramOffset = 0; - continue; - } - - auto final_program = flattenRoleProgram(root_prog); - tl.rootProgramOffset = writeRoleProgram(bc, final_program); - } -} - static void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { const auto &g = build.g; @@ -3399,13 +3321,12 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, } static -vector makeSparseIterProgram(RoseBuildImpl &build, - build_context &bc, - const RoseEdge &e) { +vector makePredProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { const RoseGraph &g = build.g; const RoseVertex v = target(e, g); - auto program = makeRoleProgram(build, bc, e); + auto program = makeProgram(build, bc, e); if (hasGreaterInDegree(1, v, g)) { // Only necessary when there is more than one pred. @@ -3415,75 +3336,215 @@ vector makeSparseIterProgram(RoseBuildImpl &build, return program; } +/** + * Returns the pair (program offset, sparse iter offset). + */ static -void buildLitSparseIter(RoseBuildImpl &build, build_context &bc, - vector &verts, RoseLiteral &tl) { - const auto &g = build.g; +pair makeSparseIterProgram(build_context &bc, + map>> &predProgramLists, + const vector &verts, + const vector &root_program) { + vector program; + u32 iter_offset = 0; + + if (!predProgramLists.empty()) { + // First, add the iterator itself. + vector keys; + for (const auto &elem : predProgramLists) { + keys.push_back(elem.first); + } + DEBUG_PRINTF("%zu keys: %s\n", keys.size(), + as_string_list(keys).c_str()); + + vector iter; + mmbBuildSparseIterator(iter, keys, bc.numStates); + assert(!iter.empty()); + iter_offset = addIteratorToTable(bc, iter); + + // Construct our program, starting with the SPARSE_ITER_BEGIN + // instruction, keeping track of the jump offset for each sub-program. + vector jump_table; + u32 curr_offset = 0; + + // Add a pre-check for min depth, if it's useful. + if (!verts.empty()) { + u32 min_depth = calcMinDepth(bc.depths, verts); + if (min_depth > 1) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_DEPTH); + ri.u.checkDepth.min_depth = min_depth; + program.push_back(ri); + curr_offset = ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + } - if (verts.empty()) { - // This literal has no non-root roles => no sparse iter - tl.iterOffset = ROSE_OFFSET_INVALID; - tl.iterProgramOffset = 0; - return; - } + program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); + curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); - // Deterministic ordering. - sort(begin(verts), end(verts), - [&g](RoseVertex a, RoseVertex b) { return g[a].idx < g[b].idx; }); + for (const auto &e : predProgramLists) { + DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), + curr_offset); + jump_table.push_back(curr_offset); + auto subprog = flattenProgram(e.second); - // pred state id -> list of programs - map>> predProgramLists; + if (e.first != keys.back()) { + // For all but the last subprogram, replace the END instruction + // with a SPARSE_ITER_NEXT. + assert(!subprog.empty()); + assert(subprog.back().code() == ROSE_INSTR_END); + subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); + } - for (const auto &v : verts) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); - for (const auto &e : in_edges_range(v, g)) { - const auto &u = source(e, g); - if (build.isAnyStart(u)) { - continue; // Root roles are not handled with sparse iterator. + for (const auto &ri : subprog) { + program.push_back(ri); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } + } - assert(contains(bc.roleStateIndices, u)); - u32 pred_state = bc.roleStateIndices.at(u); + const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(), + ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("pred %zu (state %u)\n", g[u].idx, pred_state); + // Write the jump table into the bytecode. + const u32 jump_table_offset = + add_to_engine_blob(bc, begin(jump_table), end(jump_table)); - auto program = makeSparseIterProgram(build, bc, e); - predProgramLists[pred_state].push_back(program); + // Fix up the instruction operands. + auto keys_it = begin(keys); + curr_offset = 0; + for (size_t i = 0; i < program.size(); i++) { + auto &ri = program[i]; + switch (ri.code()) { + case ROSE_INSTR_CHECK_DEPTH: + ri.u.checkDepth.fail_jump = end_offset - curr_offset; + break; + case ROSE_INSTR_SPARSE_ITER_BEGIN: + ri.u.sparseIterBegin.iter_offset = iter_offset; + ri.u.sparseIterBegin.jump_table = jump_table_offset; + ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; + break; + case ROSE_INSTR_SPARSE_ITER_NEXT: + ri.u.sparseIterNext.iter_offset = iter_offset; + ri.u.sparseIterNext.jump_table = jump_table_offset; + assert(keys_it != end(keys)); + ri.u.sparseIterNext.state = *keys_it++; + ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; + break; + default: + break; + } + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } } - map predPrograms; - for (const auto &e : predProgramLists) { - auto program = flattenRoleProgram(e.second); - u32 offset = writeRoleProgram(bc, program); - predPrograms.emplace(e.first, offset); + // If we have a root program, replace the END instruction with it. Note + // that the root program has already been flattened. + if (!root_program.empty()) { + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + program.insert(end(program), begin(root_program), end(root_program)); } - tie(tl.iterProgramOffset, tl.iterOffset) = - addPredSparseIter(bc, predPrograms); + return {writeProgram(bc, program), iter_offset}; } -// Build sparse iterators for literals. static -void buildSparseIter(RoseBuildImpl &build, build_context &bc, - vector &literalTable) { - const RoseGraph &g = build.g; +u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, + const vector &lit_edges) { + const auto &g = build.g; + + DEBUG_PRINTF("%zu lit edges\n", lit_edges.size()); + + // pred state id -> list of programs + map>> predProgramLists; + vector nonroot_verts; + + // Construct sparse iter sub-programs. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. + } + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].idx, + g[target(e, g)].idx); + assert(contains(bc.roleStateIndices, u)); + u32 pred_state = bc.roleStateIndices.at(u); + auto program = makePredProgram(build, bc, e); + predProgramLists[pred_state].push_back(program); + nonroot_verts.push_back(target(e, g)); + } + + // Construct sub-program for handling root roles. + vector> root_programs; + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (!build.isAnyStart(u)) { + continue; + } + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx); + auto role_prog = makeProgram(build, bc, e); + if (role_prog.empty()) { + continue; + } + root_programs.push_back(role_prog); + } + + vector root_program; + if (!root_programs.empty()) { + root_program = flattenProgram(root_programs); + } + + // Put it all together. + return makeSparseIterProgram(bc, predProgramLists, nonroot_verts, + root_program).first; +} + +static +map> findEdgesByLiteral(const RoseBuildImpl &build) { + // Use a set of edges while building the map to cull duplicates. + map> unique_lit_edge_map; - // Find all our non-root roles. - ue2::unordered_map> litNonRootVertices; - for (const auto &v : vertices_range(g)) { - if (build.isRootSuccessor(v)) { + const auto &g = build.g; + for (const auto &e : edges_range(g)) { + const auto &v = target(e, g); + if (build.hasDirectFinalId(v)) { + // Skip direct reports, which do not have RoseLiteral entries. continue; } for (const auto &lit_id : g[v].literals) { + assert(lit_id < build.literal_info.size()); u32 final_id = build.literal_info.at(lit_id).final_id; - litNonRootVertices[final_id].push_back(v); + if (final_id != MO_INVALID_IDX) { + unique_lit_edge_map[final_id].insert(e); + } } } + // Build output map, sorting edges by (source, target) vertex index. + map> lit_edge_map; + for (const auto &m : unique_lit_edge_map) { + auto edge_list = vector(begin(m.second), end(m.second)); + sort(begin(edge_list), end(edge_list), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].idx, g[target(a, g)].idx) < + tie(g[source(b, g)].idx, g[target(b, g)].idx); + }); + lit_edge_map.emplace(m.first, edge_list); + } + + return lit_edge_map; +} + +/** \brief Build the interpreter program for each literal. */ +static +void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, + vector &literalTable) { + auto lit_edge_map = findEdgesByLiteral(build); + for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { - buildLitSparseIter(build, bc, litNonRootVertices[finalId], - literalTable[finalId]); + const auto &lit_edges = lit_edge_map[finalId]; + u32 offset = buildLiteralProgram(build, bc, lit_edges); + literalTable[finalId].programOffset = offset; } } @@ -3514,9 +3575,11 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, return program; } -/* returns a pair containing the iter map offset and iter offset */ +/** + * Returns the pair (program offset, sparse iter offset). + */ static -pair buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) { +pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { const RoseGraph &g = build.g; // pred state id -> list of programs @@ -3546,15 +3609,35 @@ pair buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) { return {0, 0}; } - map predPrograms; - for (const auto &e : predProgramLists) { - DEBUG_PRINTF("pred %u has %zu programs\n", e.first, e.second.size()); - auto program = flattenRoleProgram(e.second); - u32 offset = writeRoleProgram(bc, program); - predPrograms.emplace(e.first, offset); + return makeSparseIterProgram(bc, predProgramLists, {}, {}); +} + +static +u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { + if (build.eod_event_literal_id == MO_INVALID_IDX) { + return 0; } - return addPredSparseIter(bc, predPrograms); + const RoseGraph &g = build.g; + const auto &lit_info = build.literal_info.at(build.eod_event_literal_id); + assert(lit_info.delayed_ids.empty()); + assert(!lit_info.squash_group); + assert(!lit_info.requires_benefits); + + // Collect all edges leading into EOD event literal vertices. + vector edge_list; + for (const auto &v : lit_info.vertices) { + insert(&edge_list, edge_list.end(), in_edges(v, g)); + } + + // Sort edge list for determinism, prettiness. + sort(begin(edge_list), end(edge_list), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].idx, g[target(a, g)].idx) < + tie(g[source(b, g)].idx, g[target(b, g)].idx); + }); + + return buildLiteralProgram(build, bc, edge_list); } static @@ -3742,11 +3825,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { vector literalTable; buildLiteralTable(*this, bc, literalTable); - buildSparseIter(*this, bc, literalTable); + buildLiteralPrograms(*this, bc, literalTable); + u32 eodProgramOffset = writeEodProgram(*this, bc); + u32 eodIterProgramOffset; u32 eodIterOffset; - u32 eodProgramTableOffset; - tie(eodProgramTableOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc); + tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc); vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -3758,9 +3842,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - // Write root programs for literals into the engine blob. - buildRootRolePrograms(*this, bc, literalTable); - u32 amatcherOffset = 0; u32 fmatcherOffset = 0; u32 ematcherOffset = 0; @@ -3968,8 +4049,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { = anchoredReportInverseMapOffset; engine->multidirectOffset = multidirectOffset; + engine->eodProgramOffset = eodProgramOffset; + engine->eodIterProgramOffset = eodIterProgramOffset; engine->eodIterOffset = eodIterOffset; - engine->eodProgramTableOffset = eodProgramTableOffset; engine->lastByteHistoryIterOffset = lastByteOffset; @@ -4038,13 +4120,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); - if (eod_event_literal_id != MO_INVALID_IDX) { - engine->hasEodEventLiteral = 1; - DEBUG_PRINTF("eod literal id=%u, final_id=%u\n", eod_event_literal_id, - literal_info.at(eod_event_literal_id).final_id); - engine->eodLiteralId = literal_info.at(eod_event_literal_id).final_id; - } - if (anchoredIsMulti(*engine)) { DEBUG_PRINTF("multiple anchored dfas\n"); engine->maxSafeAnchoredDROffset = 1; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 2a31a65a9..2a3fe5406 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -274,6 +274,13 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { continue; } + // The special EOD event literal has its own program and does not need + // a real literal ID. + if (i == tbi.eod_event_literal_id) { + assert(tbi.eod_event_literal_id != MO_INVALID_IDX); + continue; + } + const rose_literal_info &info = tbi.literal_info[i]; if (info.requires_benefits) { assert(!tbi.isDelayed(i)); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index d8048eee9..96ff77347 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -34,7 +34,6 @@ #include "rose_build_impl.h" #include "rose/rose_dump.h" #include "rose_internal.h" -#include "rose_program.h" #include "ue2common.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index aa13a627c..3f355287c 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -130,12 +130,6 @@ size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) { return count_if(tl, tl_end, pred); } -static -size_t literalsWithDepth(const RoseEngine *t, u8 depth) { - return literalsWithPredicate( - t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; }); -} - static size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) { rose_group mask = ~((1ULL << from) - 1); @@ -195,7 +189,7 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } static -void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { +void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *pc_base = pc; for (;;) { u8 code = *(const u8 *)pc; @@ -209,6 +203,12 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_DEPTH) { + os << " min_depth " << u32{ri->min_depth} << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_ONLY_EOD) { os << " fail_jump +" << ri->fail_jump << endl; } @@ -309,6 +309,21 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + os << " state " << ri->state << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { return; } PROGRAM_NEXT_INSTRUCTION @@ -323,30 +338,6 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { #undef PROGRAM_CASE #undef PROGRAM_NEXT_INSTRUCTION -static -void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset, - u32 programTableOffset) { - const auto *it = - (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset); - const u32 *programTable = - (const u32 *)loadFromByteCodeOffset(t, programTableOffset); - - // Construct a full multibit. - const u32 total_bits = t->rolesWithStateCount; - const vector bits(mmbit_size(total_bits), u8{0xff}); - - struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES]; - u32 idx = 0; - for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s); - i != MMB_INVALID; - i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) { - u32 programOffset = programTable[idx]; - os << "Sparse Iter Program " << idx << " triggered by state " << i - << " @ " << programOffset << ":" << endl; - dumpRoleProgram(os, t, (const char *)t + programOffset); - } -} - static void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); @@ -359,18 +350,11 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { os << "Literal " << i << endl; os << "---------------" << endl; - if (lit->rootProgramOffset) { - os << "Root Program @ " << lit->rootProgramOffset << ":" << endl; - dumpRoleProgram(os, t, base + lit->rootProgramOffset); - } else { - os << "" << endl; - } - - if (lit->iterOffset != ROSE_OFFSET_INVALID) { - dumpSparseIterPrograms(os, t, lit->iterOffset, - lit->iterProgramOffset); + if (lit->programOffset) { + os << "Program @ " << lit->programOffset << ":" << endl; + dumpProgram(os, t, base + lit->programOffset); } else { - os << "" << endl; + os << "" << endl; } os << endl; @@ -382,12 +366,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { static void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); + const char *base = (const char *)t; + + os << "Unconditional EOD Program:" << endl; - if (t->eodIterOffset) { - dumpSparseIterPrograms(os, t, t->eodIterOffset, - t->eodProgramTableOffset); + if (t->eodProgramOffset) { + dumpProgram(os, t, base + t->eodProgramOffset); + os << endl; + } else { + os << "" << endl; + } + + os << "Sparse Iter EOD Program:" << endl; + + if (t->eodIterProgramOffset) { + dumpProgram(os, t, base + t->eodIterProgramOffset); } else { - os << "" << endl; + os << "" << endl; } os.close(); @@ -766,33 +761,15 @@ void roseDumpText(const RoseEngine *t, FILE *f) { literalsWithPredicate( t, [](const RoseLiteral &l) { return l.squashesGroup != 0; })); fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); - fprintf(f, " - with root program : %zu\n", - literalsWithPredicate(t, [](const RoseLiteral &l) { - return l.rootProgramOffset != 0; - })); - fprintf(f, " - with sparse iter : %zu\n", - literalsWithPredicate(t, [](const RoseLiteral &l) { - return l.iterOffset != ROSE_OFFSET_INVALID; - })); + fprintf(f, " - with program : %zu\n", + literalsWithPredicate( + t, [](const RoseLiteral &l) { return l.programOffset != 0; })); fprintf(f, " - in groups ::\n"); fprintf(f, " + weak : %zu\n", literalsInGroups(t, 0, t->group_weak_end)); fprintf(f, " + general : %zu\n", literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8)); - u32 depth1 = literalsWithDepth(t, 1); - u32 depth2 = literalsWithDepth(t, 2); - u32 depth3 = literalsWithDepth(t, 3); - u32 depth4 = literalsWithDepth(t, 4); - u32 depthN = t->literalCount - (depth1 + depth2 + depth3 + depth4); - - fprintf(f, "\nLiteral depths:\n"); - fprintf(f, " minimum depth 1 : %u\n", depth1); - fprintf(f, " minimum depth 2 : %u\n", depth2); - fprintf(f, " minimum depth 3 : %u\n", depth3); - fprintf(f, " minimum depth 4 : %u\n", depth4); - fprintf(f, " minimum depth >4 : %u\n", depthN); - fprintf(f, "\n"); fprintf(f, " minWidth : %u\n", t->minWidth); fprintf(f, " minWidthExcludingBoundaries : %u\n", @@ -840,7 +817,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U8(t, hasFloatingDirectReports); DUMP_U8(t, noFloatingRoots); DUMP_U8(t, requiresEodCheck); - DUMP_U8(t, hasEodEventLiteral); DUMP_U8(t, hasOutfixesInSmallBlock); DUMP_U8(t, runtimeImpl); DUMP_U8(t, mpvTriggeredByLeaf); @@ -882,8 +858,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); DUMP_U32(t, lookaroundReachOffset); + DUMP_U32(t, eodProgramOffset); + DUMP_U32(t, eodIterProgramOffset); DUMP_U32(t, eodIterOffset); - DUMP_U32(t, eodProgramTableOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); @@ -940,7 +917,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, somRevOffsetOffset); DUMP_U32(t, group_weak_end); DUMP_U32(t, floatingStreamState); - DUMP_U32(t, eodLiteralId); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 7aae2f22c..6234bb215 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -76,38 +76,15 @@ ReportID literalToReport(u32 id) { /** \brief Structure representing a literal. */ struct RoseLiteral { /** - * \brief Role program to run unconditionally when this literal is seen. + * \brief Program to run when this literal is seen. * * Offset is relative to RoseEngine, or zero for no program. */ - u32 rootProgramOffset; - - /** - * \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over - * predecessor states. - * - * Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no - * iterator. - */ - u32 iterOffset; - - /** - * \brief Table of role programs to run when triggered by the sparse - * iterator, indexed by dense sparse iter index. - * - * Offset is relative to RoseEngine, zero for no programs. - */ - u32 iterProgramOffset; + u32 programOffset; /** \brief Bitset of groups that cause this literal to fire. */ rose_group groups; - /** - * \brief The minimum depth of this literal in the Rose graph (for depths - * greater than 1). - */ - u8 minDepth; - /** * \brief True if this literal switches off its group behind it when it * sets a role. @@ -382,7 +359,6 @@ struct RoseEngine { u8 noFloatingRoots; /* only need to run the anchored table if something * matched in the anchored table */ u8 requiresEodCheck; /* stuff happens at eod time */ - u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time. u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even in small block scans. */ u8 runtimeImpl; /**< can we just run the floating table or a single outfix? @@ -448,8 +424,9 @@ struct RoseEngine { u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 * bytes each) */ - u32 eodIterOffset; // or 0 if no eod iterator - u32 eodProgramTableOffset; + u32 eodProgramOffset; //!< Unconditional EOD program, otherwise 0. + u32 eodIterProgramOffset; // or 0 if no eod iterator program + u32 eodIterOffset; // offset to EOD sparse iter or 0 if none u32 lastByteHistoryIterOffset; // if non-zero @@ -512,7 +489,6 @@ struct RoseEngine { u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ u32 group_weak_end; /* end of weak groups, debugging only */ u32 floatingStreamState; // size in bytes - u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0. struct scatter_full_plan state_init; }; diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index ee747b9d6..f7028c722 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -42,6 +42,7 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_INSTR_CHECK_DEPTH, //!< Check minimum graph depth. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". @@ -59,43 +60,51 @@ enum RoseInstructionCode { ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. ROSE_INSTR_SET_STATE, //!< Switch a state index on. ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. + ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. ROSE_INSTR_END //!< End of program. }; struct ROSE_STRUCT_ANCHORED_DELAY { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u8 depth; //!< Depth for this state. rose_group groups; //!< Bitmask. u32 done_jump; //!< Jump forward this many bytes if successful. }; +struct ROSE_STRUCT_CHECK_DEPTH { + u8 code; //!< From enum RoseInstructionCode. + u8 min_depth; //!< Minimum depth of this literal in the Rose graph. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_ONLY_EOD { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_BOUNDS { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 min_bound; //!< Min distance from zero. u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_NOT_HANDLED { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 key; //!< Key in the "handled_roles" fatbit in scratch. u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. }; struct ROSE_STRUCT_CHECK_LOOKAROUND { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 index; u32 count; u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_LEFTFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue of leftfix to check. u32 lag; //!< Lag of leftfix for this case. ReportID report; //!< ReportID of leftfix to check. @@ -103,72 +112,95 @@ struct ROSE_STRUCT_CHECK_LEFTFIX { }; struct ROSE_STRUCT_SOM_ADJUST { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 distance; //!< Distance to EOM. }; struct ROSE_STRUCT_SOM_LEFTFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue index of leftfix providing SOM. u32 lag; //!< Lag of leftfix for this case. }; struct ROSE_STRUCT_TRIGGER_INFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u8 cancel; //!< Cancels previous top event. u32 queue; //!< Queue index of infix. u32 event; //!< Queue event, from MQE_*. }; struct ROSE_STRUCT_TRIGGER_SUFFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue index of suffix. u32 event; //!< Queue event, from MQE_*. }; struct ROSE_STRUCT_REPORT { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_CHAIN { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_EOD { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_SOM_INT { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_SOM { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_SOM_KNOWN { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_SET_STATE { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u8 depth; //!< Depth for this state. u32 index; //!< State index in multibit. }; struct ROSE_STRUCT_SET_GROUPS { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. rose_group groups; //!< Bitmask. }; +/** + * Note that the offsets in the jump table are always relative to the start of + * the program, not the current instruction. + */ +struct ROSE_STRUCT_SPARSE_ITER_BEGIN { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 jump_table; //!< Offset of jump table indexed by sparse iterator. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +/** + * Note that the offsets in the jump table are always relative to the start of + * the program, not the current instruction. + */ +struct ROSE_STRUCT_SPARSE_ITER_NEXT { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 jump_table; //!< Offset of jump table indexed by sparse iterator. + u32 state; // Current state index. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_END { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. }; #endif // ROSE_ROSE_PROGRAM_H From b6508811c03e0fa2ce583674c35f506c40e58306 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 18 Dec 2015 11:48:33 +1100 Subject: [PATCH 014/218] writeEodProgram: avoid make_move_iterator warning Avoid an ambiguity between std:: and boost::make_move_iterator on builds against libc++. --- src/rose/rose_build_bytecode.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0a0318d30..9ba7fa353 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3627,7 +3627,9 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { // Collect all edges leading into EOD event literal vertices. vector edge_list; for (const auto &v : lit_info.vertices) { - insert(&edge_list, edge_list.end(), in_edges(v, g)); + for (const auto &e : in_edges_range(v, g)) { + edge_list.push_back(e); + } } // Sort edge list for determinism, prettiness. From b460f47476c7bae036e8cea7a01be3021b3e97ff Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 8 Dec 2015 14:40:20 +1100 Subject: [PATCH 015/218] Build the tools dir only if the cmake file exists --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4034b14ba..54e5c9f72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -332,7 +332,7 @@ endif() add_subdirectory(util) add_subdirectory(unit) add_subdirectory(doc/dev-reference) -if (EXISTS ${CMAKE_SOURCE_DIR}/tools) +if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) add_subdirectory(tools) endif() From 04dfed26028c6f8613391a6e7a9e780b32fce107 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 18 Dec 2015 15:50:56 +1100 Subject: [PATCH 016/218] runtime: hoist broken check in streaming mode --- src/runtime.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index 335a83bc5..27740af22 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1184,13 +1184,7 @@ void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { assert(scratch); char *state = getMultiState(stream_state); - - u8 broken = getBroken(state); - if (unlikely(broken)) { - assert(broken == BROKEN_FROM_USER || broken == BROKEN_EXHAUSTED); - scratch->core_info.broken = broken; - return; - } + assert(!getBroken(state)); DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n", stream_state->offset, scratch->core_info.len); @@ -1215,13 +1209,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, assert(scratch); char *state = getMultiState(stream_state); - - u8 broken = getBroken(state); - if (unlikely(broken)) { - assert(broken == BROKEN_FROM_USER || broken == BROKEN_EXHAUSTED); - scratch->core_info.broken = broken; - return; - } + assert(!getBroken(state)); const struct RoseEngine *rose = stream_state->rose; const struct HWLM *ftable = getFLiteralMatcher(rose); @@ -1335,6 +1323,16 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (!id->offset && rose->boundary.reportZeroOffset) { DEBUG_PRINTF("zero reports\n"); processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); + broken = getBroken(state); + if (unlikely(broken)) { + DEBUG_PRINTF("stream is broken, halting scan\n"); + if (broken == BROKEN_FROM_USER) { + return HS_SCAN_TERMINATED; + } else { + assert(broken == BROKEN_EXHAUSTED); + return HS_SUCCESS; + } + } } switch (rose->runtimeImpl) { From b2a76e6e2bbe68a4db936bb5992c728fdacc9a6a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 23 Dec 2015 15:12:28 +1100 Subject: [PATCH 017/218] roseCheckNfaEod: use sparse iterator for EOD Rather than checking all active outfix/suffix engines, use a sparse iterator to check only those engines that accept at EOD. --- src/rose/eod.c | 33 +++++++++++++++++++++----------- src/rose/rose_build_bytecode.cpp | 25 ++++++++++++++++++++++++ src/rose/rose_dump.cpp | 1 + src/rose/rose_internal.h | 3 +++ 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/src/rose/eod.c b/src/rose/eod.c index 014b51ca9..a6524f961 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -125,33 +125,44 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, return MO_CONTINUE_MATCHING; } +/** + * \brief Check for (and deliver) reports from active output-exposed (suffix + * or outfix) NFAs. + */ static rose_inline void roseCheckNfaEod(const struct RoseEngine *t, u8 *state, struct hs_scratch *scratch, u64a offset, const char is_streaming) { - /* data, len is used for state decompress, should be full available data */ - const u8 *aa = getActiveLeafArray(t, state); - const u32 aaCount = t->activeArrayCount; + if (!t->eodNfaIterOffset) { + DEBUG_PRINTF("no engines that report at EOD\n"); + return; + } + /* data, len is used for state decompress, should be full available data */ u8 key = 0; - if (is_streaming) { const u8 *eod_data = scratch->core_info.hbuf; size_t eod_len = scratch->core_info.hlen; key = eod_len ? eod_data[eod_len - 1] : 0; } - for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; - qi = mmbit_iterate(aa, aaCount, qi)) { + const u8 *aa = getActiveLeafArray(t, state); + const u32 aaCount = t->activeArrayCount; + + const struct mmbit_sparse_iter *it = getByOffset(t, t->eodNfaIterOffset); + assert(ISALIGNED(it)); + + u32 idx = 0; + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); + qi != MMB_INVALID; + qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { const struct NfaInfo *info = getNfaInfoByQueue(t, qi); const struct NFA *nfa = getNfaByInfo(t, info); - if (!nfaAcceptsEod(nfa)) { - DEBUG_PRINTF("nfa %u does not accept eod\n", qi); - continue; - } - DEBUG_PRINTF("checking nfa %u\n", qi); + assert(nfaAcceptsEod(nfa)); char *fstate = scratch->fullState + info->fullStateOffset; const char *sstate = (const char *)state + info->stateOffset; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 9ba7fa353..9c6a84b3d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2294,6 +2294,29 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, } } +/** Returns sparse iter offset in engine blob. */ +static +u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { + vector keys; + for (u32 qi = 0; qi < activeQueueCount; ++qi) { + const NFA *n = get_nfa_from_blob(bc, qi); + if (nfaAcceptsEod(n)) { + DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi); + keys.push_back(qi); + } + } + + if (keys.empty()) { + return 0; + } + + DEBUG_PRINTF("building iter for %zu nfas\n", keys.size()); + + vector iter; + mmbBuildSparseIterator(iter, keys, activeQueueCount); + return addIteratorToTable(bc, iter); +} + static bool hasMpvTrigger(const set &reports, const ReportManager &rm) { for (u32 r : reports) { @@ -3802,6 +3825,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &leftfixBeginQueue)) { return nullptr; } + u32 eodNfaIterOffset = buildEodNfaIterator(bc, leftfixBeginQueue); buildCountingMiracles(*this, bc); u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q; @@ -4054,6 +4078,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->eodProgramOffset = eodProgramOffset; engine->eodIterProgramOffset = eodIterProgramOffset; engine->eodIterOffset = eodIterOffset; + engine->eodNfaIterOffset = eodNfaIterOffset; engine->lastByteHistoryIterOffset = lastByteOffset; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 3f355287c..beadd23e2 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -861,6 +861,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, eodProgramOffset); DUMP_U32(t, eodIterProgramOffset); DUMP_U32(t, eodIterOffset); + DUMP_U32(t, eodNfaIterOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 6234bb215..1f927a2cb 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -428,6 +428,9 @@ struct RoseEngine { u32 eodIterProgramOffset; // or 0 if no eod iterator program u32 eodIterOffset; // offset to EOD sparse iter or 0 if none + /** \brief Offset to sparse iter over outfix/suffix NFAs that accept EOD. */ + u32 eodNfaIterOffset; + u32 lastByteHistoryIterOffset; // if non-zero /** \brief Minimum number of bytes required to match. */ From 09319940bf3fb701c36137ed4f3acb9a0e0ab592 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Jan 2016 13:42:21 +1100 Subject: [PATCH 018/218] roseFlushLastByteHistory: iter state on stack --- src/rose/match.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/rose/match.h b/src/rose/match.h index 86e221831..19a07c9a1 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -268,8 +268,6 @@ void update_depth(struct RoseContext *tctxt, u8 depth) { tctxt->depth = d; } -/* Note: uses the stashed sparse iter state; cannot be called from - * anybody else who is using it */ static rose_inline void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, u64a currEnd, struct RoseContext *tctxt) { @@ -289,13 +287,16 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, DEBUG_PRINTF("flushing\n"); - const struct mmbit_sparse_iter *it - = (const void *)((const char *)t + t->lastByteHistoryIterOffset); + const struct mmbit_sparse_iter *it = + getByOffset(t, t->lastByteHistoryIterOffset); + assert(ISALIGNED(it)); + const u32 numStates = t->rolesWithStateCount; void *role_state = getRoleState(state); - mmbit_sparse_iter_unset(role_state, numStates, it, - scratch->sparse_iter_state); + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + mmbit_sparse_iter_unset(role_state, numStates, it, si_state); } #endif From dd692c5d2bdf8563f705cc7d886e38011bd91ac8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Jan 2016 13:44:26 +1100 Subject: [PATCH 019/218] roseBlockHasEodWork: iter state on stack --- src/rose/rose.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rose/rose.h b/src/rose/rose.h index 491449888..22df20d61 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -67,9 +67,9 @@ int roseBlockHasEodWork(const struct RoseEngine *t, if (t->eodIterOffset) { u32 idx; const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); - struct mmbit_sparse_state *s = scratch->sparse_iter_state; + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; if (mmbit_sparse_iter_begin(getRoleState(state), t->rolesWithStateCount, - &idx, it, s) != MMB_INVALID) { + &idx, it, si_state) != MMB_INVALID) { DEBUG_PRINTF("eod iter has states on\n"); return 1; } From 2abc038f1cb5f4449795e541153f601dbc78a462 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Jan 2016 13:46:51 +1100 Subject: [PATCH 020/218] roseCatchUpLeftfixes: iter state on stack --- src/rose/stream.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/rose/stream.c b/src/rose/stream.c index 4096c3564..ab23346a4 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -342,12 +342,13 @@ void roseCatchUpLeftfixes(const struct RoseEngine *t, u8 *state, const u32 arCount = t->activeLeftCount; const struct LeftNfaInfo *left_table = getLeftTable(t); const struct mmbit_sparse_iter *it = getActiveLeftIter(t); - struct mmbit_sparse_state *s = scratch->sparse_iter_state; + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; u32 idx = 0; - u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, s); + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); for (; ri != MMB_INVALID; - ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, s)) { + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { const struct LeftNfaInfo *left = left_table + ri; u32 qi = ri + t->leftfixBeginQueue; DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount, From 5fc4289dbe648c4ff6a1e6016e933e361ed38ffc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Jan 2016 16:02:20 +1100 Subject: [PATCH 021/218] roseRunProgram: iter state on stack --- src/rose/program_runtime.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 08dbff1f0..26cfce5f3 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -804,6 +804,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, u64a som = 0; + // Local sparse iterator state for programs that use the SPARSE_ITER_BEGIN + // and SPARSE_ITER_NEXT instructions. + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + assert(*(const u8 *)pc != ROSE_INSTR_END); for (;;) { @@ -997,13 +1001,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, getByOffset(t, ri->iter_offset); assert(ISALIGNED(it)); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - struct mmbit_sparse_state *s = scratch->sparse_iter_state; - u32 idx = 0; u32 i = mmbit_sparse_iter_begin(getRoleState(tctxt->state), t->rolesWithStateCount, &idx, - it, s); + it, si_state); if (i == MMB_INVALID) { DEBUG_PRINTF("no states in sparse iter are on\n"); assert(ri->fail_jump); // must progress @@ -1011,6 +1012,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, continue; } + struct hs_scratch *scratch = tctxtToScratch(tctxt); fatbit_clear(scratch->handled_roles); const u32 *jumps = getByOffset(t, ri->jump_table); @@ -1028,13 +1030,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, getByOffset(t, ri->iter_offset); assert(ISALIGNED(it)); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - struct mmbit_sparse_state *s = scratch->sparse_iter_state; - u32 idx = 0; u32 i = mmbit_sparse_iter_next(getRoleState(tctxt->state), t->rolesWithStateCount, - ri->state, &idx, it, s); + ri->state, &idx, it, si_state); if (i == MMB_INVALID) { DEBUG_PRINTF("no more states in sparse iter are on\n"); assert(ri->fail_jump); // must progress From abb5a82057862bd44a0f91633d91c79dcfe4fc15 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Jan 2016 16:04:19 +1100 Subject: [PATCH 022/218] scratch: remove sparse iter state (now unused) --- src/scratch.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scratch.h b/src/scratch.h index 07e725117..a1efe6d94 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -192,7 +192,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { * location had been writable */ u64a som_set_now_offset; /**< offset at which som_set_now represents */ u32 som_store_count; - struct mmbit_sparse_state sparse_iter_state[MAX_SPARSE_ITER_STATES]; }; static really_inline From 68f6849687c237425dff08c8b00efbeedf06e8c5 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 12:36:12 +0000 Subject: [PATCH 023/218] Adding AVX2 version of truffle --- src/nfa/truffle.c | 204 ++++++++++++++++++++++++++++----------- src/nfa/truffle.h | 8 ++ src/nfa/truffle_common.h | 149 ++++++++++++++++++++++++++++ 3 files changed, 304 insertions(+), 57 deletions(-) create mode 100644 src/nfa/truffle_common.h diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 86dcda638..8863c71a0 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -37,18 +37,9 @@ #include "util/simd_utils.h" #include "util/simd_utils_ssse3.h" -#define shift128r(a, b) _mm_srli_epi64((a), (b)) +#include "truffle_common.h" -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffff)) { - u32 pos = ctz32(~z & 0xffff); - assert(pos < 16); - return buf + pos; - } - - return NULL; // no match -} +#if !defined(__AVX2__) static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { @@ -61,25 +52,6 @@ const u8 *lastMatch(const u8 *buf, u32 z) { return NULL; // no match } -static really_inline -u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { - - m128 highconst = _mm_set1_epi8(0x80); - m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); - - // and now do the real work - m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); - m128 t1 = xor128(v, highconst); - m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); - m128 t2 = andnot128(highconst, shift128r(v, 4)); - m128 shuf3 = pshufb(shuf_mask_hi, t2); - m128 tmp = and128(or128(shuf1, shuf2), shuf3); - m128 tmp2 = eq128(tmp, zeroes128()); - u32 z = movemask128(tmp2); - - return z; -} - static really_inline const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v, const u8 *buf) { @@ -94,30 +66,9 @@ const u8 *revBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, return lastMatch(buf, z); } -static -const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 16); - - m128 chars = zeroes128(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; - const u8 *rv = firstMatch(buf, z| mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - const u8 *truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("len %zu\n", buf_end - buf); assert(buf && buf_end); @@ -166,8 +117,8 @@ const u8 *truffleExec(m128 shuf_mask_lo_highclear, static const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end) { + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end) { uintptr_t len = buf_end - buf; assert(len < 16); @@ -184,11 +135,9 @@ const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, return buf - 1; } - const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) { - assert(buf && buf_end); assert(buf < buf_end); const u8 *rv; @@ -233,4 +182,145 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, return buf - 1; } +#else + +static really_inline +const u8 *lastMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffffffff)) { + u32 pos = clz32(~z); + assert(pos < 32); + return buf + (31 - pos); + } + + return NULL; // no match +} + +static really_inline +const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + m256 v, const u8 *buf) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return firstMatch(buf, z); +} + +static really_inline +const u8 *revBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + m256 v, const u8 *buf) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return lastMatch(buf, z); +} + +const u8 *truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("len %zu\n", buf_end - buf); + const m256 wide_clear = set2x128(shuf_mask_lo_highclear); + const m256 wide_set = set2x128(shuf_mask_lo_highset); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf < 32) { + return truffleMini(wide_clear, wide_set, buf, buf_end); + } + + size_t min = (size_t)buf % 32; + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf); + rv = fwdBlock(wide_clear, wide_set, chars, buf); + if (rv) { + return rv; + } + buf += (32 - min); + + const u8 *last_block = buf_end - 32; + while (buf < last_block) { + m256 lchars = load256(buf); + rv = fwdBlock(wide_clear, wide_set, lchars, buf); + if (rv) { + return rv; + } + buf += 32; + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 32); + chars = loadu256(buf_end - 32); + rv = fwdBlock(wide_clear, wide_set, chars, buf_end - 32); + if (rv) { + return rv; + } + return buf_end; +} + +static +const u8 *truffleRevMini(m256 shuf_mask_lo_highclear, + m256 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 32); + + m256 chars = zeroes256(); + memcpy(&chars, buf, len); + + u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + const u8 *rv = lastMatch(buf, z | mask); + + if (rv) { + return rv; + } + return buf - 1; +} + + +const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + const m256 wide_clear = set2x128(shuf_mask_lo_highclear); + const m256 wide_set = set2x128(shuf_mask_lo_highset); + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + DEBUG_PRINTF("len %zu\n", buf_end - buf); + + if (buf_end - buf < 32) { + return truffleRevMini(wide_clear, wide_set, buf, buf_end); + } + + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf_end - 32); + rv = revBlock(wide_clear, wide_set, chars, + buf_end - 32); + if (rv) { + return rv; + } + buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0x1f)); + + const u8 *last_block = buf + 32; + while (buf_end > last_block) { + buf_end -= 32; + m256 lchars = load256(buf_end); + rv = revBlock(wide_clear, wide_set, lchars, buf_end); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + chars = loadu256(buf); + rv = revBlock(wide_clear, wide_set, chars, buf); + if (rv) { + return rv; + } + return buf - 1; +} +#endif diff --git a/src/nfa/truffle.h b/src/nfa/truffle.h index cf5f63462..f67227ad1 100644 --- a/src/nfa/truffle.h +++ b/src/nfa/truffle.h @@ -26,9 +26,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** \file + * \brief Truffle: fully general character class acceleration. + * + * Utilises the SSSE3 pshufb or AVX2 vpshufb shuffle instructions + */ + #ifndef TRUFFLE_H #define TRUFFLE_H + #include "util/simd_types.h" + #ifdef __cplusplus extern "C" { diff --git a/src/nfa/truffle_common.h b/src/nfa/truffle_common.h new file mode 100644 index 000000000..122f65c49 --- /dev/null +++ b/src/nfa/truffle_common.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TRUFFLE_COMMON_H_ +#define TRUFFLE_COMMON_H_ + +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" + +/* + * Common stuff for all versions of truffle (single, multi and multidouble) + */ +#if !defined(__AVX2__) + +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffff)) { + u32 pos = ctz32(~z & 0xffff); + assert(pos < 16); + return buf + pos; + } + + return NULL; // no match +} + +#define shift128r(a, b) _mm_srli_epi64((a), (b)) +static really_inline +u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { + + m128 highconst = _mm_set1_epi8(0x80); + m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); + + // and now do the real work + m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); + m128 t1 = xor128(v, highconst); + m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); + m128 t2 = andnot128(highconst, shift128r(v, 4)); + m128 shuf3 = pshufb(shuf_mask_hi, t2); + m128 tmp = and128(or128(shuf1, shuf2), shuf3); + m128 tmp2 = eq128(tmp, zeroes128()); + u32 z = movemask128(tmp2); + + return z; +} + +static +const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 16); + + m128 chars = zeroes128(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; + const u8 *rv = firstMatch(buf, z| mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + +#else + +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffffffff)) { + u32 pos = ctz32(~z); + assert(pos < 32); + return buf + pos; + } + + return NULL; // no match +} + +#define shift256r(a, b) _mm256_srli_epi64((a), (b)) +static really_inline +u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { + + m256 highconst = _mm256_set1_epi8(0x80); + m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); + + // and now do the real work + m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); + m256 t1 = xor256(v, highconst); + m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); + m256 t2 = andnot256(highconst, shift256r(v, 4)); + m256 shuf3 = vpshufb(shuf_mask_hi, t2); + m256 tmp = and256(or256(shuf1, shuf2), shuf3); + m256 tmp2 = eq256(tmp, zeroes256()); + u32 z = movemask256(tmp2); + + return z; +} + +static +const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 32); + + m256 chars = zeroes256(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + +#endif + +#endif /* TRUFFLE_COMMON_H_ */ From 77ff826bbfd28c5dcc2e141ce84f14a0752d6cce Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 11:11:49 +0000 Subject: [PATCH 024/218] Adding bitmatchers --- CMakeLists.txt | 7 + src/nfa/multiaccel_common.h | 265 +++++++++++++++++++++++++++ src/nfa/multiaccel_doubleshift.h | 149 +++++++++++++++ src/nfa/multiaccel_doubleshiftgrab.h | 152 +++++++++++++++ src/nfa/multiaccel_long.h | 145 +++++++++++++++ src/nfa/multiaccel_longgrab.h | 148 +++++++++++++++ src/nfa/multiaccel_shift.h | 145 +++++++++++++++ src/nfa/multiaccel_shiftgrab.h | 148 +++++++++++++++ src/util/join.h | 6 + 9 files changed, 1165 insertions(+) create mode 100644 src/nfa/multiaccel_common.h create mode 100644 src/nfa/multiaccel_doubleshift.h create mode 100644 src/nfa/multiaccel_doubleshiftgrab.h create mode 100644 src/nfa/multiaccel_long.h create mode 100644 src/nfa/multiaccel_longgrab.h create mode 100644 src/nfa/multiaccel_shift.h create mode 100644 src/nfa/multiaccel_shiftgrab.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 54e5c9f72..714168d8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -437,6 +437,13 @@ set (hs_exec_SRCS src/nfa/mpv.h src/nfa/mpv.c src/nfa/mpv_internal.h + src/nfa/multiaccel_common.h + src/nfa/multiaccel_doubleshift.h + src/nfa/multiaccel_doubleshiftgrab.h + src/nfa/multiaccel_long.h + src/nfa/multiaccel_longgrab.h + src/nfa/multiaccel_shift.h + src/nfa/multiaccel_shiftgrab.h src/nfa/nfa_api.h src/nfa/nfa_api_dispatch.c src/nfa/nfa_internal.h diff --git a/src/nfa/multiaccel_common.h b/src/nfa/multiaccel_common.h new file mode 100644 index 000000000..1a13c3b6d --- /dev/null +++ b/src/nfa/multiaccel_common.h @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCEL_COMMON_H_ +#define MULTIACCEL_COMMON_H_ + +#include "config.h" +#include "ue2common.h" +#include "util/join.h" +#include "util/bitutils.h" + +/* + * When doing shifting, remember that the total number of shifts should be n-1 + */ +#define VARISHIFT(src, dst, len) \ + do { \ + (dst) &= (src) >> (len); \ + } while (0) +#define STATIC_SHIFT1(x) \ + do { \ + (x) &= (x) >> 1; \ + } while (0) +#define STATIC_SHIFT2(x) \ + do { \ + (x) &= (x) >> 2;\ + } while (0) +#define STATIC_SHIFT4(x) \ + do { \ + (x) &= (x) >> 4; \ + } while (0) +#define STATIC_SHIFT8(x) \ + do { \ + (x) &= (x) >> 8; \ + } while (0) +#define SHIFT1(x) \ + do {} while (0) +#define SHIFT2(x) \ + do { \ + STATIC_SHIFT1(x); \ + } while (0) +#define SHIFT3(x) \ + do { \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT1(x); \ + } while (0) +#define SHIFT4(x) \ + do { \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT5(x) \ + do { \ + SHIFT4(x); \ + STATIC_SHIFT1(x); \ + } while (0) +#define SHIFT6(x) \ + do { \ + SHIFT4(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT7(x) \ + do { \ + SHIFT4(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT8(x) \ + do { \ + SHIFT4(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT9(x) \ + do { \ + SHIFT8(x); \ + STATIC_SHIFT1(x); \ + } while (0) +#define SHIFT10(x) \ + do { \ + SHIFT8(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT11(x) \ + do { \ + SHIFT8(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT12(x); \ + do { \ + SHIFT8(x);\ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT13(x); \ + do { \ + SHIFT8(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT14(x) \ + do { \ + SHIFT8(x); \ + STATIC_SHIFT2(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT15(x) \ + do { \ + SHIFT8(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT16(x) \ + do { \ + SHIFT8(x); \ + STATIC_SHIFT8(x); \ + } while (0) +#define SHIFT17(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT1(x); \ + } while (0) +#define SHIFT18(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT19(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT20(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT21(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT22(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT2(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT23(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT24(x) \ + do { \ + SHIFT16(x); \ + STATIC_SHIFT8(x); \ + } while (0) +#define SHIFT25(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT1(x); \ + } while (0) +#define SHIFT26(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT27(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + } while (0) +#define SHIFT28(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT29(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT30(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT2(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT31(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT1(x); \ + STATIC_SHIFT2(x); \ + STATIC_SHIFT4(x); \ + } while (0) +#define SHIFT32(x) \ + do { \ + SHIFT24(x); \ + STATIC_SHIFT8(x); \ + } while (0) + +/* + * this function is used by 32-bit multiaccel matchers. 32-bit matchers accept + * a 32-bit integer as a buffer, where low 16 bits is movemask result and + * high 16 bits are "don't care" values. this function is not expected to return + * a result higher than 16. + */ +static really_inline +const u8 *match32(const u8 *buf, const u32 z) { + if (unlikely(z != 0)) { + u32 pos = ctz32(z); + assert(pos < 16); + return buf + pos; + } + return NULL; +} + +/* + * this function is used by 64-bit multiaccel matchers. 64-bit matchers accept + * a 64-bit integer as a buffer, where low 32 bits is movemask result and + * high 32 bits are "don't care" values. this function is not expected to return + * a result higher than 32. + */ +static really_inline +const u8 *match64(const u8 *buf, const u64a z) { + if (unlikely(z != 0)) { + u32 pos = ctz64(z); + assert(pos < 32); + return buf + pos; + } + return NULL; +} + +#endif /* MULTIACCEL_COMMON_H_ */ diff --git a/src/nfa/multiaccel_doubleshift.h b/src/nfa/multiaccel_doubleshift.h new file mode 100644 index 000000000..7ed7534cf --- /dev/null +++ b/src/nfa/multiaccel_doubleshift.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCEL_DOUBLESHIFT_H_ +#define MULTIACCEL_DOUBLESHIFT_H_ + +#include "multiaccel_common.h" + +#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \ + static really_inline \ + const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ + if (unlikely(z)) { \ + match_t tmp = z; \ + z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ + tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ + VARISHIFT(z, z, len); \ + VARISHIFT(tmp, tmp, len2); \ + VARISHIFT(tmp, z, len); \ + return JOIN(match, match_sz)(buf, z); \ + } \ + return NULL; \ + } + +#define DOUBLESHIFT_MATCH_32_DEF(n) \ + DOUBLESHIFT_MATCH(n, u32, 32) +#define DOUBLESHIFT_MATCH_64_DEF(n) \ + DOUBLESHIFT_MATCH(n, u64a, 64) +#define DOUBLESHIFT_MATCH_DEF(n) \ + DOUBLESHIFT_MATCH_32_DEF(n) \ + DOUBLESHIFT_MATCH_64_DEF(n) + +DOUBLESHIFT_MATCH_DEF(1) +DOUBLESHIFT_MATCH_DEF(2) +DOUBLESHIFT_MATCH_DEF(3) +DOUBLESHIFT_MATCH_DEF(4) +DOUBLESHIFT_MATCH_DEF(5) +DOUBLESHIFT_MATCH_DEF(6) +DOUBLESHIFT_MATCH_DEF(7) +DOUBLESHIFT_MATCH_DEF(8) +DOUBLESHIFT_MATCH_DEF(9) +DOUBLESHIFT_MATCH_DEF(10) +DOUBLESHIFT_MATCH_DEF(11) +DOUBLESHIFT_MATCH_DEF(12) +DOUBLESHIFT_MATCH_DEF(13) +DOUBLESHIFT_MATCH_DEF(14) +DOUBLESHIFT_MATCH_DEF(15) +DOUBLESHIFT_MATCH_64_DEF(16) +DOUBLESHIFT_MATCH_64_DEF(17) +DOUBLESHIFT_MATCH_64_DEF(18) +DOUBLESHIFT_MATCH_64_DEF(19) +DOUBLESHIFT_MATCH_64_DEF(20) +DOUBLESHIFT_MATCH_64_DEF(21) +DOUBLESHIFT_MATCH_64_DEF(22) +DOUBLESHIFT_MATCH_64_DEF(23) +DOUBLESHIFT_MATCH_64_DEF(24) +DOUBLESHIFT_MATCH_64_DEF(25) +DOUBLESHIFT_MATCH_64_DEF(26) +DOUBLESHIFT_MATCH_64_DEF(27) +DOUBLESHIFT_MATCH_64_DEF(28) +DOUBLESHIFT_MATCH_64_DEF(29) +DOUBLESHIFT_MATCH_64_DEF(30) +DOUBLESHIFT_MATCH_64_DEF(31) + +static +const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = +{ +// skip the first + 0, + &doubleshiftMatch_32_1, + &doubleshiftMatch_32_2, + &doubleshiftMatch_32_3, + &doubleshiftMatch_32_4, + &doubleshiftMatch_32_5, + &doubleshiftMatch_32_6, + &doubleshiftMatch_32_7, + &doubleshiftMatch_32_8, + &doubleshiftMatch_32_9, + &doubleshiftMatch_32_10, + &doubleshiftMatch_32_11, + &doubleshiftMatch_32_12, + &doubleshiftMatch_32_13, + &doubleshiftMatch_32_14, + &doubleshiftMatch_32_15, +}; + +static +const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = +{ +// skip the first + 0, + &doubleshiftMatch_64_1, + &doubleshiftMatch_64_2, + &doubleshiftMatch_64_3, + &doubleshiftMatch_64_4, + &doubleshiftMatch_64_5, + &doubleshiftMatch_64_6, + &doubleshiftMatch_64_7, + &doubleshiftMatch_64_8, + &doubleshiftMatch_64_9, + &doubleshiftMatch_64_10, + &doubleshiftMatch_64_11, + &doubleshiftMatch_64_12, + &doubleshiftMatch_64_13, + &doubleshiftMatch_64_14, + &doubleshiftMatch_64_15, + &doubleshiftMatch_64_16, + &doubleshiftMatch_64_17, + &doubleshiftMatch_64_18, + &doubleshiftMatch_64_19, + &doubleshiftMatch_64_20, + &doubleshiftMatch_64_21, + &doubleshiftMatch_64_22, + &doubleshiftMatch_64_23, + &doubleshiftMatch_64_24, + &doubleshiftMatch_64_25, + &doubleshiftMatch_64_26, + &doubleshiftMatch_64_27, + &doubleshiftMatch_64_28, + &doubleshiftMatch_64_29, + &doubleshiftMatch_64_30, + &doubleshiftMatch_64_31, +}; + +#endif /* MULTIACCEL_DOUBLESHIFT_H_ */ diff --git a/src/nfa/multiaccel_doubleshiftgrab.h b/src/nfa/multiaccel_doubleshiftgrab.h new file mode 100644 index 000000000..51955b4a6 --- /dev/null +++ b/src/nfa/multiaccel_doubleshiftgrab.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_ +#define MULTIACCEL_DOUBLESHIFTGRAB_H_ + +#include "multiaccel_common.h" + +#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \ + static really_inline \ + const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ + if (unlikely(z)) { \ + match_t neg = ~z; \ + match_t tmp = z; \ + z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ + tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ + neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ + VARISHIFT(z, z, len); \ + VARISHIFT(tmp, tmp, len2); \ + VARISHIFT(neg, z, 1); \ + VARISHIFT(tmp, z, len); \ + return JOIN(match, match_sz)(buf, z); \ + } \ + return NULL; \ + } + +#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ + DOUBLESHIFTGRAB_MATCH(n, u32, 32) +#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \ + DOUBLESHIFTGRAB_MATCH(n, u64a, 64) +#define DOUBLESHIFTGRAB_MATCH_DEF(n) \ + DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ + DOUBLESHIFTGRAB_MATCH_64_DEF(n) + +DOUBLESHIFTGRAB_MATCH_DEF(1) +DOUBLESHIFTGRAB_MATCH_DEF(2) +DOUBLESHIFTGRAB_MATCH_DEF(3) +DOUBLESHIFTGRAB_MATCH_DEF(4) +DOUBLESHIFTGRAB_MATCH_DEF(5) +DOUBLESHIFTGRAB_MATCH_DEF(6) +DOUBLESHIFTGRAB_MATCH_DEF(7) +DOUBLESHIFTGRAB_MATCH_DEF(8) +DOUBLESHIFTGRAB_MATCH_DEF(9) +DOUBLESHIFTGRAB_MATCH_DEF(10) +DOUBLESHIFTGRAB_MATCH_DEF(11) +DOUBLESHIFTGRAB_MATCH_DEF(12) +DOUBLESHIFTGRAB_MATCH_DEF(13) +DOUBLESHIFTGRAB_MATCH_DEF(14) +DOUBLESHIFTGRAB_MATCH_DEF(15) +DOUBLESHIFTGRAB_MATCH_64_DEF(16) +DOUBLESHIFTGRAB_MATCH_64_DEF(17) +DOUBLESHIFTGRAB_MATCH_64_DEF(18) +DOUBLESHIFTGRAB_MATCH_64_DEF(19) +DOUBLESHIFTGRAB_MATCH_64_DEF(20) +DOUBLESHIFTGRAB_MATCH_64_DEF(21) +DOUBLESHIFTGRAB_MATCH_64_DEF(22) +DOUBLESHIFTGRAB_MATCH_64_DEF(23) +DOUBLESHIFTGRAB_MATCH_64_DEF(24) +DOUBLESHIFTGRAB_MATCH_64_DEF(25) +DOUBLESHIFTGRAB_MATCH_64_DEF(26) +DOUBLESHIFTGRAB_MATCH_64_DEF(27) +DOUBLESHIFTGRAB_MATCH_64_DEF(28) +DOUBLESHIFTGRAB_MATCH_64_DEF(29) +DOUBLESHIFTGRAB_MATCH_64_DEF(30) +DOUBLESHIFTGRAB_MATCH_64_DEF(31) + +static +const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = +{ +// skip the first + 0, + &doubleshiftgrabMatch_32_1, + &doubleshiftgrabMatch_32_2, + &doubleshiftgrabMatch_32_3, + &doubleshiftgrabMatch_32_4, + &doubleshiftgrabMatch_32_5, + &doubleshiftgrabMatch_32_6, + &doubleshiftgrabMatch_32_7, + &doubleshiftgrabMatch_32_8, + &doubleshiftgrabMatch_32_9, + &doubleshiftgrabMatch_32_10, + &doubleshiftgrabMatch_32_11, + &doubleshiftgrabMatch_32_12, + &doubleshiftgrabMatch_32_13, + &doubleshiftgrabMatch_32_14, + &doubleshiftgrabMatch_32_15, +}; + +static +const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = +{ +// skip the first + 0, + &doubleshiftgrabMatch_64_1, + &doubleshiftgrabMatch_64_2, + &doubleshiftgrabMatch_64_3, + &doubleshiftgrabMatch_64_4, + &doubleshiftgrabMatch_64_5, + &doubleshiftgrabMatch_64_6, + &doubleshiftgrabMatch_64_7, + &doubleshiftgrabMatch_64_8, + &doubleshiftgrabMatch_64_9, + &doubleshiftgrabMatch_64_10, + &doubleshiftgrabMatch_64_11, + &doubleshiftgrabMatch_64_12, + &doubleshiftgrabMatch_64_13, + &doubleshiftgrabMatch_64_14, + &doubleshiftgrabMatch_64_15, + &doubleshiftgrabMatch_64_16, + &doubleshiftgrabMatch_64_17, + &doubleshiftgrabMatch_64_18, + &doubleshiftgrabMatch_64_19, + &doubleshiftgrabMatch_64_20, + &doubleshiftgrabMatch_64_21, + &doubleshiftgrabMatch_64_22, + &doubleshiftgrabMatch_64_23, + &doubleshiftgrabMatch_64_24, + &doubleshiftgrabMatch_64_25, + &doubleshiftgrabMatch_64_26, + &doubleshiftgrabMatch_64_27, + &doubleshiftgrabMatch_64_28, + &doubleshiftgrabMatch_64_29, + &doubleshiftgrabMatch_64_30, + &doubleshiftgrabMatch_64_31, +}; + +#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */ diff --git a/src/nfa/multiaccel_long.h b/src/nfa/multiaccel_long.h new file mode 100644 index 000000000..515f0bc22 --- /dev/null +++ b/src/nfa/multiaccel_long.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCEL_LONG_H_ +#define MULTIACCEL_LONG_H_ + +#include "multiaccel_common.h" + +#define LONG_MATCH(len, match_t, match_sz) \ + static really_inline \ + const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ + if (unlikely(z)) { \ + z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ + JOIN(SHIFT, len)(z); \ + return JOIN(match, match_sz)(buf, z); \ + } \ + return NULL; \ + } + +#define LONG_MATCH_32_DEF(n) \ + LONG_MATCH(n, u32, 32) +#define LONG_MATCH_64_DEF(n) \ + LONG_MATCH(n, u64a, 64) +#define LONG_MATCH_DEF(n) \ + LONG_MATCH_32_DEF(n) \ + LONG_MATCH_64_DEF(n) + +LONG_MATCH_DEF(1) +LONG_MATCH_DEF(2) +LONG_MATCH_DEF(3) +LONG_MATCH_DEF(4) +LONG_MATCH_DEF(5) +LONG_MATCH_DEF(6) +LONG_MATCH_DEF(7) +LONG_MATCH_DEF(8) +LONG_MATCH_DEF(9) +LONG_MATCH_DEF(10) +LONG_MATCH_DEF(11) +LONG_MATCH_DEF(12) +LONG_MATCH_DEF(13) +LONG_MATCH_DEF(14) +LONG_MATCH_DEF(15) +LONG_MATCH_64_DEF(16) +LONG_MATCH_64_DEF(17) +LONG_MATCH_64_DEF(18) +LONG_MATCH_64_DEF(19) +LONG_MATCH_64_DEF(20) +LONG_MATCH_64_DEF(21) +LONG_MATCH_64_DEF(22) +LONG_MATCH_64_DEF(23) +LONG_MATCH_64_DEF(24) +LONG_MATCH_64_DEF(25) +LONG_MATCH_64_DEF(26) +LONG_MATCH_64_DEF(27) +LONG_MATCH_64_DEF(28) +LONG_MATCH_64_DEF(29) +LONG_MATCH_64_DEF(30) +LONG_MATCH_64_DEF(31) + +static +const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) = +{ + // skip the first three + 0, + &longMatch_32_1, + &longMatch_32_2, + &longMatch_32_3, + &longMatch_32_4, + &longMatch_32_5, + &longMatch_32_6, + &longMatch_32_7, + &longMatch_32_8, + &longMatch_32_9, + &longMatch_32_10, + &longMatch_32_11, + &longMatch_32_12, + &longMatch_32_13, + &longMatch_32_14, + &longMatch_32_15, + }; + +static +const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) = +{ +// skip the first three + 0, + &longMatch_64_1, + &longMatch_64_2, + &longMatch_64_3, + &longMatch_64_4, + &longMatch_64_5, + &longMatch_64_6, + &longMatch_64_7, + &longMatch_64_8, + &longMatch_64_9, + &longMatch_64_10, + &longMatch_64_11, + &longMatch_64_12, + &longMatch_64_13, + &longMatch_64_14, + &longMatch_64_15, + &longMatch_64_16, + &longMatch_64_17, + &longMatch_64_18, + &longMatch_64_19, + &longMatch_64_20, + &longMatch_64_21, + &longMatch_64_22, + &longMatch_64_23, + &longMatch_64_24, + &longMatch_64_25, + &longMatch_64_26, + &longMatch_64_27, + &longMatch_64_28, + &longMatch_64_29, + &longMatch_64_30, + &longMatch_64_31, +}; + +#endif /* MULTIACCEL_LONG_H_ */ diff --git a/src/nfa/multiaccel_longgrab.h b/src/nfa/multiaccel_longgrab.h new file mode 100644 index 000000000..09daaf82a --- /dev/null +++ b/src/nfa/multiaccel_longgrab.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCEL_LONGGRAB_H_ +#define MULTIACCEL_LONGGRAB_H_ + +#include "multiaccel_common.h" + +#define LONGGRAB_MATCH(len, match_t, match_sz) \ + static really_inline \ + const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ + if (unlikely(z)) { \ + match_t tmp = ~z; \ + tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ + z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ + JOIN(SHIFT, len)(z); \ + VARISHIFT(tmp, z, len); \ + return JOIN(match, match_sz)(buf, z); \ + } \ + return NULL; \ + } + +#define LONGGRAB_MATCH_32_DEF(n) \ + LONGGRAB_MATCH(n, u32, 32) +#define LONGGRAB_MATCH_64_DEF(n) \ + LONGGRAB_MATCH(n, u64a, 64) +#define LONGGRAB_MATCH_DEF(n) \ + LONGGRAB_MATCH_32_DEF(n) \ + LONGGRAB_MATCH_64_DEF(n) + +LONGGRAB_MATCH_DEF(1) +LONGGRAB_MATCH_DEF(2) +LONGGRAB_MATCH_DEF(3) +LONGGRAB_MATCH_DEF(4) +LONGGRAB_MATCH_DEF(5) +LONGGRAB_MATCH_DEF(6) +LONGGRAB_MATCH_DEF(7) +LONGGRAB_MATCH_DEF(8) +LONGGRAB_MATCH_DEF(9) +LONGGRAB_MATCH_DEF(10) +LONGGRAB_MATCH_DEF(11) +LONGGRAB_MATCH_DEF(12) +LONGGRAB_MATCH_DEF(13) +LONGGRAB_MATCH_DEF(14) +LONGGRAB_MATCH_DEF(15) +LONGGRAB_MATCH_64_DEF(16) +LONGGRAB_MATCH_64_DEF(17) +LONGGRAB_MATCH_64_DEF(18) +LONGGRAB_MATCH_64_DEF(19) +LONGGRAB_MATCH_64_DEF(20) +LONGGRAB_MATCH_64_DEF(21) +LONGGRAB_MATCH_64_DEF(22) +LONGGRAB_MATCH_64_DEF(23) +LONGGRAB_MATCH_64_DEF(24) +LONGGRAB_MATCH_64_DEF(25) +LONGGRAB_MATCH_64_DEF(26) +LONGGRAB_MATCH_64_DEF(27) +LONGGRAB_MATCH_64_DEF(28) +LONGGRAB_MATCH_64_DEF(29) +LONGGRAB_MATCH_64_DEF(30) +LONGGRAB_MATCH_64_DEF(31) + +static +const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) = +{ +// skip the first three + 0, + &longgrabMatch_32_1, + &longgrabMatch_32_2, + &longgrabMatch_32_3, + &longgrabMatch_32_4, + &longgrabMatch_32_5, + &longgrabMatch_32_6, + &longgrabMatch_32_7, + &longgrabMatch_32_8, + &longgrabMatch_32_9, + &longgrabMatch_32_10, + &longgrabMatch_32_11, + &longgrabMatch_32_12, + &longgrabMatch_32_13, + &longgrabMatch_32_14, + &longgrabMatch_32_15, + }; + +static +const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) = +{ +// skip the first three + 0, + &longgrabMatch_64_1, + &longgrabMatch_64_2, + &longgrabMatch_64_3, + &longgrabMatch_64_4, + &longgrabMatch_64_5, + &longgrabMatch_64_6, + &longgrabMatch_64_7, + &longgrabMatch_64_8, + &longgrabMatch_64_9, + &longgrabMatch_64_10, + &longgrabMatch_64_11, + &longgrabMatch_64_12, + &longgrabMatch_64_13, + &longgrabMatch_64_14, + &longgrabMatch_64_15, + &longgrabMatch_64_16, + &longgrabMatch_64_17, + &longgrabMatch_64_18, + &longgrabMatch_64_19, + &longgrabMatch_64_20, + &longgrabMatch_64_21, + &longgrabMatch_64_22, + &longgrabMatch_64_23, + &longgrabMatch_64_24, + &longgrabMatch_64_25, + &longgrabMatch_64_26, + &longgrabMatch_64_27, + &longgrabMatch_64_28, + &longgrabMatch_64_29, + &longgrabMatch_64_30, + &longgrabMatch_64_31, +}; + +#endif /* MULTIACCEL_LONGGRAB_H_ */ diff --git a/src/nfa/multiaccel_shift.h b/src/nfa/multiaccel_shift.h new file mode 100644 index 000000000..fd362a8b6 --- /dev/null +++ b/src/nfa/multiaccel_shift.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCEL_SHIFT_H_ +#define MULTIACCEL_SHIFT_H_ + +#include "multiaccel_common.h" + +#define SHIFT_MATCH(len, match_t, match_sz) \ + static really_inline \ + const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ + if (unlikely(z)) { \ + z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ + VARISHIFT(z, z, len); \ + return JOIN(match, match_sz)(buf, z); \ + } \ + return NULL; \ + } + +#define SHIFT_MATCH_32_DEF(n) \ + SHIFT_MATCH(n, u32, 32) +#define SHIFT_MATCH_64_DEF(n) \ + SHIFT_MATCH(n, u64a, 64) +#define SHIFT_MATCH_DEF(n) \ + SHIFT_MATCH_32_DEF(n) \ + SHIFT_MATCH_64_DEF(n) + +SHIFT_MATCH_DEF(1) +SHIFT_MATCH_DEF(2) +SHIFT_MATCH_DEF(3) +SHIFT_MATCH_DEF(4) +SHIFT_MATCH_DEF(5) +SHIFT_MATCH_DEF(6) +SHIFT_MATCH_DEF(7) +SHIFT_MATCH_DEF(8) +SHIFT_MATCH_DEF(9) +SHIFT_MATCH_DEF(10) +SHIFT_MATCH_DEF(11) +SHIFT_MATCH_DEF(12) +SHIFT_MATCH_DEF(13) +SHIFT_MATCH_DEF(14) +SHIFT_MATCH_DEF(15) +SHIFT_MATCH_64_DEF(16) +SHIFT_MATCH_64_DEF(17) +SHIFT_MATCH_64_DEF(18) +SHIFT_MATCH_64_DEF(19) +SHIFT_MATCH_64_DEF(20) +SHIFT_MATCH_64_DEF(21) +SHIFT_MATCH_64_DEF(22) +SHIFT_MATCH_64_DEF(23) +SHIFT_MATCH_64_DEF(24) +SHIFT_MATCH_64_DEF(25) +SHIFT_MATCH_64_DEF(26) +SHIFT_MATCH_64_DEF(27) +SHIFT_MATCH_64_DEF(28) +SHIFT_MATCH_64_DEF(29) +SHIFT_MATCH_64_DEF(30) +SHIFT_MATCH_64_DEF(31) + +static +const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) = +{ +// skip the first + 0, + &shiftMatch_32_1, + &shiftMatch_32_2, + &shiftMatch_32_3, + &shiftMatch_32_4, + &shiftMatch_32_5, + &shiftMatch_32_6, + &shiftMatch_32_7, + &shiftMatch_32_8, + &shiftMatch_32_9, + &shiftMatch_32_10, + &shiftMatch_32_11, + &shiftMatch_32_12, + &shiftMatch_32_13, + &shiftMatch_32_14, + &shiftMatch_32_15, +}; + +static +const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) = +{ +// skip the first + 0, + &shiftMatch_64_1, + &shiftMatch_64_2, + &shiftMatch_64_3, + &shiftMatch_64_4, + &shiftMatch_64_5, + &shiftMatch_64_6, + &shiftMatch_64_7, + &shiftMatch_64_8, + &shiftMatch_64_9, + &shiftMatch_64_10, + &shiftMatch_64_11, + &shiftMatch_64_12, + &shiftMatch_64_13, + &shiftMatch_64_14, + &shiftMatch_64_15, + &shiftMatch_64_16, + &shiftMatch_64_17, + &shiftMatch_64_18, + &shiftMatch_64_19, + &shiftMatch_64_20, + &shiftMatch_64_21, + &shiftMatch_64_22, + &shiftMatch_64_23, + &shiftMatch_64_24, + &shiftMatch_64_25, + &shiftMatch_64_26, + &shiftMatch_64_27, + &shiftMatch_64_28, + &shiftMatch_64_29, + &shiftMatch_64_30, + &shiftMatch_64_31, +}; + +#endif /* MULTIACCEL_SHIFT_H_ */ diff --git a/src/nfa/multiaccel_shiftgrab.h b/src/nfa/multiaccel_shiftgrab.h new file mode 100644 index 000000000..032ed0865 --- /dev/null +++ b/src/nfa/multiaccel_shiftgrab.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCEL_SHIFTGRAB_H_ +#define MULTIACCEL_SHIFTGRAB_H_ + +#include "multiaccel_common.h" + +#define SHIFTGRAB_MATCH(len, match_t, match_sz) \ + static really_inline \ + const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ + if (unlikely(z)) { \ + match_t tmp = ~z; \ + z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ + tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ + VARISHIFT(z, z, len); \ + VARISHIFT(tmp, z, 1); \ + return JOIN(match, match_sz)(buf, z); \ + } \ + return NULL; \ + } + +#define SHIFTGRAB_MATCH_32_DEF(n) \ + SHIFTGRAB_MATCH(n, u32, 32) +#define SHIFTGRAB_MATCH_64_DEF(n) \ + SHIFTGRAB_MATCH(n, u64a, 64) +#define SHIFTGRAB_MATCH_DEF(n) \ + SHIFTGRAB_MATCH_32_DEF(n) \ + SHIFTGRAB_MATCH_64_DEF(n) + +SHIFTGRAB_MATCH_DEF(1) +SHIFTGRAB_MATCH_DEF(2) +SHIFTGRAB_MATCH_DEF(3) +SHIFTGRAB_MATCH_DEF(4) +SHIFTGRAB_MATCH_DEF(5) +SHIFTGRAB_MATCH_DEF(6) +SHIFTGRAB_MATCH_DEF(7) +SHIFTGRAB_MATCH_DEF(8) +SHIFTGRAB_MATCH_DEF(9) +SHIFTGRAB_MATCH_DEF(10) +SHIFTGRAB_MATCH_DEF(11) +SHIFTGRAB_MATCH_DEF(12) +SHIFTGRAB_MATCH_DEF(13) +SHIFTGRAB_MATCH_DEF(14) +SHIFTGRAB_MATCH_DEF(15) +SHIFTGRAB_MATCH_64_DEF(16) +SHIFTGRAB_MATCH_64_DEF(17) +SHIFTGRAB_MATCH_64_DEF(18) +SHIFTGRAB_MATCH_64_DEF(19) +SHIFTGRAB_MATCH_64_DEF(20) +SHIFTGRAB_MATCH_64_DEF(21) +SHIFTGRAB_MATCH_64_DEF(22) +SHIFTGRAB_MATCH_64_DEF(23) +SHIFTGRAB_MATCH_64_DEF(24) +SHIFTGRAB_MATCH_64_DEF(25) +SHIFTGRAB_MATCH_64_DEF(26) +SHIFTGRAB_MATCH_64_DEF(27) +SHIFTGRAB_MATCH_64_DEF(28) +SHIFTGRAB_MATCH_64_DEF(29) +SHIFTGRAB_MATCH_64_DEF(30) +SHIFTGRAB_MATCH_64_DEF(31) + +static +const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) = +{ +// skip the first + 0, + &shiftgrabMatch_32_1, + &shiftgrabMatch_32_2, + &shiftgrabMatch_32_3, + &shiftgrabMatch_32_4, + &shiftgrabMatch_32_5, + &shiftgrabMatch_32_6, + &shiftgrabMatch_32_7, + &shiftgrabMatch_32_8, + &shiftgrabMatch_32_9, + &shiftgrabMatch_32_10, + &shiftgrabMatch_32_11, + &shiftgrabMatch_32_12, + &shiftgrabMatch_32_13, + &shiftgrabMatch_32_14, + &shiftgrabMatch_32_15, +}; + +static +const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) = + { +// skip the first + 0, + &shiftgrabMatch_64_1, + &shiftgrabMatch_64_2, + &shiftgrabMatch_64_3, + &shiftgrabMatch_64_4, + &shiftgrabMatch_64_5, + &shiftgrabMatch_64_6, + &shiftgrabMatch_64_7, + &shiftgrabMatch_64_8, + &shiftgrabMatch_64_9, + &shiftgrabMatch_64_10, + &shiftgrabMatch_64_11, + &shiftgrabMatch_64_12, + &shiftgrabMatch_64_13, + &shiftgrabMatch_64_14, + &shiftgrabMatch_64_15, + &shiftgrabMatch_64_16, + &shiftgrabMatch_64_17, + &shiftgrabMatch_64_18, + &shiftgrabMatch_64_19, + &shiftgrabMatch_64_20, + &shiftgrabMatch_64_21, + &shiftgrabMatch_64_22, + &shiftgrabMatch_64_23, + &shiftgrabMatch_64_24, + &shiftgrabMatch_64_25, + &shiftgrabMatch_64_26, + &shiftgrabMatch_64_27, + &shiftgrabMatch_64_28, + &shiftgrabMatch_64_29, + &shiftgrabMatch_64_30, + &shiftgrabMatch_64_31, +}; + +#endif /* MULTIACCEL_SHIFTGRAB_H_ */ diff --git a/src/util/join.h b/src/util/join.h index 0bc79d9b3..7d5a30c39 100644 --- a/src/util/join.h +++ b/src/util/join.h @@ -31,4 +31,10 @@ #define JOIN(x, y) JOIN_AGAIN(x, y) #define JOIN_AGAIN(x, y) x ## y +#define JOIN3(x, y, z) JOIN_AGAIN3(x, y, z) +#define JOIN_AGAIN3(x, y, z) x ## y ## z + +#define JOIN4(w, x, y, z) JOIN_AGAIN4(w, x, y, z) +#define JOIN_AGAIN4(w, x, y, z) w ## x ## y ## z + #endif From dd2ec6bdaca08cb10f3d134f1862401811157543 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 11:46:19 +0000 Subject: [PATCH 025/218] Multibyte vermicelli runtime --- CMakeLists.txt | 4 + src/nfa/accel.c | 103 ++++++++ src/nfa/accel.h | 28 +- src/nfa/accel_dump.cpp | 41 +++ src/nfa/limex_accel.c | 61 +++++ src/nfa/multivermicelli.c | 108 ++++++++ src/nfa/multivermicelli.h | 62 +++++ src/nfa/multivermicelli_avx2.h | 283 +++++++++++++++++++++ src/nfa/multivermicelli_sse.h | 452 +++++++++++++++++++++++++++++++++ 9 files changed, 1141 insertions(+), 1 deletion(-) create mode 100644 src/nfa/multivermicelli.c create mode 100644 src/nfa/multivermicelli.h create mode 100644 src/nfa/multivermicelli_avx2.h create mode 100644 src/nfa/multivermicelli_sse.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 714168d8a..0848f5501 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -444,6 +444,10 @@ set (hs_exec_SRCS src/nfa/multiaccel_longgrab.h src/nfa/multiaccel_shift.h src/nfa/multiaccel_shiftgrab.h + src/nfa/multivermicelli.c + src/nfa/multivermicelli.h + src/nfa/multivermicelli_sse.h + src/nfa/multivermicelli_avx2.h src/nfa/nfa_api.h src/nfa/nfa_api_dispatch.c src/nfa/nfa_internal.h diff --git a/src/nfa/accel.c b/src/nfa/accel.c index af5e9610a..43ecd84f0 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -30,6 +30,7 @@ #include "shufti.h" #include "truffle.h" #include "vermicelli.h" +#include "multivermicelli.h" #include "ue2common.h" const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { @@ -117,6 +118,108 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = c_end; break; + /* multibyte matchers */ + case ACCEL_MLVERM: + DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); + break; + case ACCEL_MLVERM_NOCASE: + DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); + break; + case ACCEL_MLGVERM: + DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); + break; + case ACCEL_MLGVERM_NOCASE: + DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); + break; + case ACCEL_MSVERM: + DEBUG_PRINTF("accel msverm %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); + break; + case ACCEL_MSVERM_NOCASE: + DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); + break; + case ACCEL_MSGVERM: + DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); + break; + case ACCEL_MSGVERM_NOCASE: + DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); + break; + case ACCEL_MDSVERM: + DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end, + accel->mdverm.len1, accel->mdverm.len2); + break; + case ACCEL_MDSVERM_NOCASE: + DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end, + accel->mdverm.len1, accel->mdverm.len2); + break; + case ACCEL_MDSGVERM: + DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end, + accel->mdverm.len1, accel->mdverm.len2); + break; + case ACCEL_MDSGVERM_NOCASE: + DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end, + accel->mdverm.len1, accel->mdverm.len2); + break; + default: assert(!"not here"); return c; diff --git a/src/nfa/accel.h b/src/nfa/accel.h index 2c1f223a4..cc64d5870 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -60,7 +60,20 @@ enum AccelType { ACCEL_SHUFTI, ACCEL_DSHUFTI, ACCEL_TRUFFLE, - ACCEL_RED_TAPE + ACCEL_RED_TAPE, + /* multibyte vermicellis */ + ACCEL_MLVERM, + ACCEL_MLVERM_NOCASE, + ACCEL_MLGVERM, + ACCEL_MLGVERM_NOCASE, + ACCEL_MSVERM, + ACCEL_MSVERM_NOCASE, + ACCEL_MSGVERM, + ACCEL_MSGVERM_NOCASE, + ACCEL_MDSVERM, + ACCEL_MDSVERM_NOCASE, + ACCEL_MDSGVERM, + ACCEL_MDSGVERM_NOCASE, }; /** \brief Structure for accel framework. */ @@ -81,6 +94,19 @@ union AccelAux { u8 c1; // uppercase if nocase u8 c2; // uppercase if nocase } dverm; + struct { + u8 accel_type; + u8 offset; + u8 c; // uppercase if nocase + u8 len; + } mverm; + struct { + u8 accel_type; + u8 offset; + u8 c; // uppercase if nocase + u8 len1; + u8 len2; + } mdverm; struct { u8 accel_type; u8 offset; diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 40c9c653f..19116a8f1 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -86,6 +86,30 @@ const char *accelName(u8 accel_type) { return "truffle"; case ACCEL_RED_TAPE: return "red tape"; + case ACCEL_MLVERM: + return "multibyte long vermicelli"; + case ACCEL_MLVERM_NOCASE: + return "multibyte long vermicelli nocase"; + case ACCEL_MLGVERM: + return "multibyte long-grab vermicelli"; + case ACCEL_MLGVERM_NOCASE: + return "multibyte long-grab vermicelli nocase"; + case ACCEL_MSVERM: + return "multibyte shift vermicelli"; + case ACCEL_MSVERM_NOCASE: + return "multibyte shift vermicelli nocase"; + case ACCEL_MSGVERM: + return "multibyte shift-grab vermicelli"; + case ACCEL_MSGVERM_NOCASE: + return "multibyte shift-grab vermicelli nocase"; + case ACCEL_MDSVERM: + return "multibyte doubleshift vermicelli"; + case ACCEL_MDSVERM_NOCASE: + return "multibyte doubleshift vermicelli nocase"; + case ACCEL_MDSGVERM: + return "multibyte doubleshift-grab vermicelli"; + case ACCEL_MDSGVERM_NOCASE: + return "multibyte doubleshift-grab vermicelli nocase"; default: return "unknown!"; } @@ -143,6 +167,23 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { describeClass(cr).c_str()); break; } + case ACCEL_MLVERM: + case ACCEL_MLVERM_NOCASE: + case ACCEL_MLGVERM: + case ACCEL_MLGVERM_NOCASE: + case ACCEL_MSVERM: + case ACCEL_MSVERM_NOCASE: + case ACCEL_MSGVERM: + case ACCEL_MSGVERM_NOCASE: + fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len); + break; + case ACCEL_MDSVERM: + case ACCEL_MDSVERM_NOCASE: + case ACCEL_MDSGVERM: + case ACCEL_MDSGVERM_NOCASE: + fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1, + accel.mdverm.len2); + break; default: fprintf(f, "\n"); break; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 1aa1b30fa..b04792b21 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -38,6 +38,7 @@ #include "nfa_internal.h" #include "shufti.h" #include "truffle.h" +#include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" #include "util/bitutils.h" @@ -78,6 +79,66 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end); break; + case ACCEL_MLVERM: + DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = long_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); + break; + case ACCEL_MLVERM_NOCASE: + DEBUG_PRINTF("long vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = long_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); + break; + case ACCEL_MLGVERM: + DEBUG_PRINTF("long grab vermicelli for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = longgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); + break; + case ACCEL_MLGVERM_NOCASE: + DEBUG_PRINTF("long grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = longgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); + break; + case ACCEL_MSVERM: + DEBUG_PRINTF("shift vermicelli for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = shift_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); + break; + case ACCEL_MSVERM_NOCASE: + DEBUG_PRINTF("shift vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = shift_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); + break; + case ACCEL_MSGVERM: + DEBUG_PRINTF("shift grab vermicelli for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = shiftgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); + break; + case ACCEL_MSGVERM_NOCASE: + DEBUG_PRINTF("shift grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); + offset = aux->mverm.offset; + ptr = shiftgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); + break; + case ACCEL_MDSVERM: + DEBUG_PRINTF("double shift vermicelli for 0x%02hhx\n", aux->mdverm.c); + offset = aux->mdverm.offset; + ptr = doubleshift_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2); + break; + case ACCEL_MDSVERM_NOCASE: + DEBUG_PRINTF("double shift vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c); + offset = aux->mverm.offset; + ptr = doubleshift_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2); + break; + case ACCEL_MDSGVERM: + DEBUG_PRINTF("double shift grab vermicelli for 0x%02hhx\n", aux->mdverm.c); + offset = aux->mverm.offset; + ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2); + break; + case ACCEL_MDSGVERM_NOCASE: + DEBUG_PRINTF("double shift grab vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c); + offset = aux->mverm.offset; + ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2); + break; case ACCEL_SHUFTI: DEBUG_PRINTF("single shufti\n"); offset = aux->shufti.offset; diff --git a/src/nfa/multivermicelli.c b/src/nfa/multivermicelli.c new file mode 100644 index 000000000..ab6d2cf21 --- /dev/null +++ b/src/nfa/multivermicelli.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ue2common.h" + +#include "multivermicelli.h" + +#include "multiaccel_common.h" + +#if !defined(__AVX2__) + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multivermicelli_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multivermicelli_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multivermicelli_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multivermicelli_sse.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multivermicelli_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multivermicelli_sse.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#else + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multivermicelli_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multivermicelli_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multivermicelli_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multivermicelli_avx2.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multivermicelli_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multivermicelli_avx2.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#endif diff --git a/src/nfa/multivermicelli.h b/src/nfa/multivermicelli.h new file mode 100644 index 000000000..55f9b1f28 --- /dev/null +++ b/src/nfa/multivermicelli.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIVERMICELLI_H_ +#define MULTIVERMICELLI_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +#ifdef __cplusplus +} +#endif + + +#endif /* MULTIVERMICELLI_H_ */ diff --git a/src/nfa/multivermicelli_avx2.h b/src/nfa/multivermicelli_avx2.h new file mode 100644 index 000000000..9081aa3fc --- /dev/null +++ b/src/nfa/multivermicelli_avx2.h @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/unaligned.h" + +#include "multiaccel_common.h" + +static really_inline +const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars, + const u8 *buf, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + m256 casemask = set32x8(CASE_CLEAR); + const u8 *ptr; + m256 data = loadu256(buf); + u32 z = movemask256(eq256(chars, and256(casemask, data))); + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (buf, z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + return NULL; +} + +static really_inline +const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars, + const u8 *buf, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8 *ptr; + + m256 data = loadu256(buf); + u32 z = movemask256(eq256(chars, data)); + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (buf, z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + return NULL; +} + +/* + * 32-byte pipeline + */ +static really_inline +const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 32 bytes + m256 data = load256(buf); + u32 z = movemask256(eq256(chars, data)); + last_res = z; + last_buf = buf; + buf += 32; + + // now, start the pipeline! + assert((size_t)buf % 32 == 0); + for (; buf + 31 < buf_end; buf += 32) { + // scan more data + data = load256(buf); + z = movemask256(eq256(chars, data)); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_buf = buf; + last_res = z; + } + assert(buf <= buf_end && buf >= buf_end - 32); + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + return NULL; +} + +/* + * 32-byte caseless pipeline + */ +static really_inline +const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + m256 casemask = set32x8(CASE_CLEAR); + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 32 bytes + m256 data = load256(buf); + u32 z = movemask256(eq256(chars, and256(casemask, data))); + last_res = z; + last_buf = buf; + buf += 32; + + + // now, start the pipeline! + assert((size_t)buf % 32 == 0); + for (; buf + 31 < buf_end; buf += 32) { + // scan more data + data = load256(buf); + z = movemask256(eq256(chars, and256(casemask, data))); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_buf = buf; + last_res = z; + } + assert(buf <= buf_end && buf >= buf_end - 32); + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + return NULL; +} + +const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); + assert(buf < buf_end); + + const u8 *ptr; + + // Handle small scans. + if (buf_end - buf < 32) { + for (; buf < buf_end; buf++) { + char cur = (char)*buf; + if (nocase) { + cur &= CASE_CLEAR; + } + if (cur == c) { + break; + } + } + return buf; + } + + m256 chars = set32x8(c); /* nocase already uppercase */ + + uintptr_t min = (uintptr_t)buf % 32; + + if (min) { + ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, + buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ) : JOIN(MATCH_ALGO, vermUnalign)(chars, + buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + buf += 32 - min; + } + + if (buf_end - buf >= 32){ + ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ) : JOIN(MATCH_ALGO, vermPipeline)(chars, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + } + + // final unaligned scan + ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, + buf_end - 32, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ) : JOIN(MATCH_ALGO, vermUnalign)(chars, + buf_end - 32, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + + // run our pipeline + return ptr ? ptr : buf_end; +} diff --git a/src/nfa/multivermicelli_sse.h b/src/nfa/multivermicelli_sse.h new file mode 100644 index 000000000..cdacd2c43 --- /dev/null +++ b/src/nfa/multivermicelli_sse.h @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/unaligned.h" + +#define VERM_BOUNDARY 16 +#define VERM_TYPE m128 +#define VERM_SET_FN set16x8 + +#include "multiaccel_common.h" + +static really_inline +const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars, + const u8 *buf, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + m128 casemask = set16x8(CASE_CLEAR); + const u8 *ptr; + m128 data = loadu128(buf); + u32 z = movemask128(eq128(chars, and128(casemask, data))); + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (buf, z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + return NULL; +} + +static really_inline +const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars, + const u8 *buf, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8 *ptr; + + m128 data = loadu128(buf); + u32 z = movemask128(eq128(chars, data)); + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (buf, z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + return NULL; +} + +/* + * 16-byte pipeline, for smaller scans + */ +static +const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 16 bytes + m128 data = load128(buf); + u32 z = movemask128(eq128(chars, data)); + last_buf = buf; + last_res = z; + buf += 16; + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 15 < buf_end; buf += 16) { + // scan more data + data = load128(buf); + z = movemask128(eq128(chars, data)); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_buf = buf; + last_res = z; + } + assert(buf <= buf_end && buf >= buf_end - 16); + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + return NULL; +} + +/* + * 16-byte pipeline, for smaller scans + */ +static +const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + m128 casemask = set16x8(CASE_CLEAR); + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 16 bytes + m128 data = load128(buf); + u32 z = movemask128(eq128(chars, and128(casemask, data))); + last_buf = buf; + last_res = z; + buf += 16; + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 15 < buf_end; buf += 16) { + // scan more data + data = load128(buf); + z = movemask128(eq128(chars, and128(casemask, data))); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_buf = buf; + last_res = z; + } + assert(buf <= buf_end && buf >= buf_end - 16); + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + return NULL; +} + +/* + * 32-byte pipeline, for bigger scans + */ +static +const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 res; + + // pipeline prologue: scan first 32 bytes + m128 data1 = load128(buf); + u32 z1 = movemask128(eq128(chars, data1)); + m128 data2 = load128(buf + 16); + u32 z2 = movemask128(eq128(chars, data2)); + + // store the results + u32 last_res = z1 | (z2 << VERM_BOUNDARY); + last_buf = buf; + buf += 32; + + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 31 < buf_end; buf += 32) { + // scan more data + data1 = load128(buf); + z1 = movemask128(eq128(chars, data1)); + data2 = load128(buf + 16); + z2 = movemask128(eq128(chars, data2)); + res = z1 | (z2 << 16); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_res = res; + last_buf = buf; + } + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + // if we still have some data left, scan it too + if (buf + 15 < buf_end) { + return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + } + assert(buf <= buf_end && buf >= buf_end - 16); + + return NULL; +} + +/* + * 32-byte caseless pipeline, for bigger scans + */ +static +const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + m128 casemask = set16x8(CASE_CLEAR); + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 32 bytes + m128 data1 = load128(buf); + u32 z1 = movemask128(eq128(chars, and128(casemask, data1))); + m128 data2 = load128(buf + 16); + u32 z2 = movemask128(eq128(chars, and128(casemask, data2))); + u32 z = z1 | (z2 << VERM_BOUNDARY); + + last_res = z; + last_buf = buf; + buf += 32; + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 31 < buf_end; buf += 32) { + // scan more data + data1 = load128(buf); + z1 = movemask128(eq128(chars, and128(casemask, data1))); + data2 = load128(buf + 16); + z2 = movemask128(eq128(chars, and128(casemask, data2))); + z = z1 | (z2 << 16); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_res = z; + last_buf = buf; + } + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + // if we still have some data left, scan it too + if (buf + 15 < buf_end) { + return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + } + assert(buf <= buf_end && buf >= buf_end - 16); + + return NULL; +} + +const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, + const u8 *buf, + const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); + assert(buf < buf_end); + + const u8 *ptr; + + // Handle small scans. + if (buf_end - buf < VERM_BOUNDARY) { + for (; buf < buf_end; buf++) { + char cur = (char)*buf; + if (nocase) { + cur &= CASE_CLEAR; + } + if (cur == c) { + break; + } + } + return buf; + } + + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ + + uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; + + if (min) { + ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, + buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ) : JOIN(MATCH_ALGO, vermUnalign)(chars, + buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + buf += VERM_BOUNDARY - min; + } + + // if we have enough data, run bigger pipeline; otherwise run smaller one + if (buf_end - buf >= 128) { + ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ) : JOIN(MATCH_ALGO, vermPipeline32)(chars, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + } else if (buf_end - buf >= 16){ + ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ) : JOIN(MATCH_ALGO, vermPipeline16)(chars, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + } + + // final unaligned scan + ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, + buf_end - VERM_BOUNDARY, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ) : JOIN(MATCH_ALGO, vermUnalign)(chars, + buf_end - VERM_BOUNDARY, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + + // run our pipeline + return ptr ? ptr : buf_end; +} From 47b17ade27ffef5e169bdd2da4fe41803606f814 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 12:20:34 +0000 Subject: [PATCH 026/218] Multibyte shufti runtime --- CMakeLists.txt | 5 + src/nfa/accel.c | 55 ++++++++ src/nfa/accel.h | 22 +++ src/nfa/accel_dump.cpp | 62 ++++++--- src/nfa/limex_accel.c | 27 ++++ src/nfa/multishufti.c | 114 ++++++++++++++++ src/nfa/multishufti.h | 70 ++++++++++ src/nfa/multishufti_avx2.h | 122 +++++++++++++++++ src/nfa/multishufti_sse.h | 266 +++++++++++++++++++++++++++++++++++++ src/nfa/shufti.c | 108 ++------------- src/nfa/shufti_common.h | 146 ++++++++++++++++++++ 11 files changed, 886 insertions(+), 111 deletions(-) create mode 100644 src/nfa/multishufti.c create mode 100644 src/nfa/multishufti.h create mode 100644 src/nfa/multishufti_avx2.h create mode 100644 src/nfa/multishufti_sse.h create mode 100644 src/nfa/shufti_common.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0848f5501..353bc5611 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -444,6 +444,10 @@ set (hs_exec_SRCS src/nfa/multiaccel_longgrab.h src/nfa/multiaccel_shift.h src/nfa/multiaccel_shiftgrab.h + src/nfa/multishufti.c + src/nfa/multishufti_avx2.h + src/nfa/multishufti_sse.h + src/nfa/multishufti.h src/nfa/multivermicelli.c src/nfa/multivermicelli.h src/nfa/multivermicelli_sse.h @@ -455,6 +459,7 @@ set (hs_exec_SRCS src/nfa/repeat.c src/nfa/repeat.h src/nfa/repeat_internal.h + src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h src/nfa/truffle.c diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 43ecd84f0..ee081154d 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -30,6 +30,7 @@ #include "shufti.h" #include "truffle.h" #include "vermicelli.h" +#include "multishufti.h" #include "multivermicelli.h" #include "ue2common.h" @@ -219,6 +220,60 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end, accel->mdverm.len1, accel->mdverm.len2); break; + case ACCEL_MLSHUFTI: + DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, + accel->mshufti.len); + break; + case ACCEL_MLGSHUFTI: + DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, + accel->mshufti.len); + break; + case ACCEL_MSSHUFTI: + DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, + accel->mshufti.len); + break; + case ACCEL_MSGSHUFTI: + DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, + accel->mshufti.len); + break; + case ACCEL_MDSSHUFTI: + DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, + accel->mdshufti.len1, accel->mdshufti.len2); + break; + case ACCEL_MDSGSHUFTI: + DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, + accel->mdshufti.len1, accel->mdshufti.len2); + break; default: assert(!"not here"); diff --git a/src/nfa/accel.h b/src/nfa/accel.h index cc64d5870..87acf6cf9 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -74,6 +74,13 @@ enum AccelType { ACCEL_MDSVERM_NOCASE, ACCEL_MDSGVERM, ACCEL_MDSGVERM_NOCASE, + /* multibyte shuftis */ + ACCEL_MLSHUFTI, + ACCEL_MLGSHUFTI, + ACCEL_MSSHUFTI, + ACCEL_MSGSHUFTI, + ACCEL_MDSSHUFTI, + ACCEL_MDSGSHUFTI, }; /** \brief Structure for accel framework. */ @@ -121,6 +128,21 @@ union AccelAux { m128 lo2; m128 hi2; } dshufti; + struct { + u8 accel_type; + u8 offset; + m128 lo; + m128 hi; + u8 len; + } mshufti; + struct { + u8 accel_type; + u8 offset; + m128 lo; + m128 hi; + u8 len1; + u8 len2; + } mdshufti; struct { u8 accel_type; u8 offset; diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 19116a8f1..5a28c6a08 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -110,11 +110,38 @@ const char *accelName(u8 accel_type) { return "multibyte doubleshift-grab vermicelli"; case ACCEL_MDSGVERM_NOCASE: return "multibyte doubleshift-grab vermicelli nocase"; + case ACCEL_MLSHUFTI: + return "multibyte long shufti"; + case ACCEL_MLGSHUFTI: + return "multibyte long-grab shufti"; + case ACCEL_MSSHUFTI: + return "multibyte shift shufti"; + case ACCEL_MSGSHUFTI: + return "multibyte shift-grab shufti"; + case ACCEL_MDSSHUFTI: + return "multibyte doubleshift shufti"; + case ACCEL_MDSGSHUFTI: + return "multibyte doubleshift-grab shufti"; default: return "unknown!"; } } +static +void dumpShuftiCharReach(FILE *f, const m128 &lo, const m128 &hi) { + CharReach cr = shufti2cr(lo, hi); + fprintf(f, "count %zu class %s\n", cr.count(), + describeClass(cr).c_str()); +} + +static +void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) { + fprintf(f, "lo %s\n", + dumpMask((const u8 *)&lo, 128).c_str()); + fprintf(f, "hi %s\n", + dumpMask((const u8 *)&hi, 128).c_str()); +} + void dumpAccelInfo(FILE *f, const AccelAux &accel) { fprintf(f, " %s", accelName(accel.accel_type)); if (accel.generic.offset) { @@ -136,25 +163,16 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { break; case ACCEL_SHUFTI: { fprintf(f, "\n"); - fprintf(f, "lo %s\n", - dumpMask((const u8 *)&accel.shufti.lo, 128).c_str()); - fprintf(f, "hi %s\n", - dumpMask((const u8 *)&accel.shufti.hi, 128).c_str()); - CharReach cr = shufti2cr(accel.shufti.lo, accel.shufti.hi); - fprintf(f, "count %zu class %s\n", cr.count(), - describeClass(cr).c_str()); + dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi); + dumpShuftiCharReach(f, accel.shufti.lo, accel.shufti.hi); break; } case ACCEL_DSHUFTI: fprintf(f, "\n"); - fprintf(f, "lo1 %s\n", - dumpMask((const u8 *)&accel.dshufti.lo1, 128).c_str()); - fprintf(f, "hi1 %s\n", - dumpMask((const u8 *)&accel.dshufti.hi1, 128).c_str()); - fprintf(f, "lo2 %s\n", - dumpMask((const u8 *)&accel.dshufti.lo2, 128).c_str()); - fprintf(f, "hi2 %s\n", - dumpMask((const u8 *)&accel.dshufti.hi2, 128).c_str()); + fprintf(f, "mask 1\n"); + dumpShuftiMasks(f, accel.dshufti.lo1, accel.dshufti.hi1); + fprintf(f, "mask 2\n"); + dumpShuftiMasks(f, accel.dshufti.lo2, accel.dshufti.hi2); break; case ACCEL_TRUFFLE: { fprintf(f, "\n"); @@ -184,6 +202,20 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1, accel.mdverm.len2); break; + case ACCEL_MLSHUFTI: + case ACCEL_MLGSHUFTI: + case ACCEL_MSSHUFTI: + case ACCEL_MSGSHUFTI: + fprintf(f, " len:%u\n", accel.mshufti.len); + dumpShuftiMasks(f, accel.mshufti.lo, accel.mshufti.hi); + dumpShuftiCharReach(f, accel.mshufti.lo, accel.mshufti.hi); + break; + case ACCEL_MDSSHUFTI: + case ACCEL_MDSGSHUFTI: + fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2); + dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi); + dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi); + break; default: fprintf(f, "\n"); break; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index b04792b21..c12f917ac 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -38,6 +38,7 @@ #include "nfa_internal.h" #include "shufti.h" #include "truffle.h" +#include "multishufti.h" #include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" @@ -150,6 +151,32 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { ptr = shuftiDoubleExec(aux->dshufti.lo1, aux->dshufti.hi1, aux->dshufti.lo2, aux->dshufti.hi2, ptr, end); break; + case ACCEL_MLSHUFTI: + offset = aux->mshufti.offset; + ptr = long_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); + break; + case ACCEL_MLGSHUFTI: + offset = aux->mshufti.offset; + ptr = longgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); + break; + case ACCEL_MSSHUFTI: + offset = aux->mshufti.offset; + ptr = shift_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); + break; + case ACCEL_MSGSHUFTI: + offset = aux->mshufti.offset; + ptr = shiftgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); + break; + case ACCEL_MDSSHUFTI: + offset = aux->mdshufti.offset; + ptr = doubleshift_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end, + aux->mdshufti.len1, aux->mdshufti.len2); + break; + case ACCEL_MDSGSHUFTI: + offset = aux->mdshufti.offset; + ptr = doubleshiftgrab_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end, + aux->mdshufti.len1, aux->mdshufti.len2); + break; case ACCEL_TRUFFLE: DEBUG_PRINTF("truffle shuffle\n"); offset = aux->truffle.offset; diff --git a/src/nfa/multishufti.c b/src/nfa/multishufti.c new file mode 100644 index 000000000..cb85b7186 --- /dev/null +++ b/src/nfa/multishufti.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti: character class acceleration. + * + * Utilises the SSSE3 pshufb shuffle instruction + */ + +#include "config.h" +#include "ue2common.h" + +#include "multishufti.h" + +#include "multiaccel_common.h" + +#if !defined(__AVX2__) + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multishufti_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multishufti_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multishufti_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multishufti_sse.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multishufti_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multishufti_sse.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#else + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multishufti_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multishufti_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multishufti_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multishufti_avx2.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multishufti_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multishufti_avx2.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#endif diff --git a/src/nfa/multishufti.h b/src/nfa/multishufti.h new file mode 100644 index 000000000..bcccf607c --- /dev/null +++ b/src/nfa/multishufti.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Multishufti: multibyte version of Shufti + * + * Utilises the SSSE3 pshufb shuffle instruction + */ + +#ifndef MULTISHUFTI_H +#define MULTISHUFTI_H + +#include "ue2common.h" +#include "util/simd_utils.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/nfa/multishufti_avx2.h b/src/nfa/multishufti_avx2.h new file mode 100644 index 000000000..e9980872d --- /dev/null +++ b/src/nfa/multishufti_avx2.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "shufti_common.h" + +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" + +static really_inline +const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars, + const u8 *buf, const m256 low4bits, + const m256 zeroes, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); +} + +const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, + const u8 *buf, + const u8 *buf_end, u8 run_len +#ifdef MULTIACCEL_DOUBLE + , u8 run_len2 +#endif + ) { + assert(buf && buf_end); + assert(buf < buf_end); + + // Slow path for small cases. + if (buf_end - buf < 32) { + return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, + buf, buf_end); + } + + const m256 zeroes = zeroes256(); + const m256 low4bits = set32x8(0xf); + const m256 wide_mask_lo = set2x128(mask_lo); + const m256 wide_mask_hi = set2x128(mask_hi); + const u8 *rv; + + size_t min = (size_t)buf % 32; + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf, + low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += (32 - min); + + // Unrolling was here, but it wasn't doing anything but taking up space. + // Reroll FTW. + const u8 *last_block = buf_end - 32; + while (buf < last_block) { + m256 lchars = load256(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf, + low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += 32; + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 32); + chars = loadu256(buf_end - 32); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, + low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + + return buf_end; +} diff --git a/src/nfa/multishufti_sse.h b/src/nfa/multishufti_sse.h new file mode 100644 index 000000000..7ea5946d2 --- /dev/null +++ b/src/nfa/multishufti_sse.h @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "shufti_common.h" + +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" + +/* Normal SSSE3 shufti */ + +static really_inline +const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars, + const u8 *buf, const m128 low4bits, + const m128 zeroes, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + // negate first 16 bits + u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF; + return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); +} + +/* + * 16-byte pipeline, for smaller scans + */ +static +const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi, + const u8 *buf, const u8 *buf_end, + const m128 low4bits, + const m128 zeroes, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 16 bytes + m128 data = load128(buf); + u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; + last_buf = buf; + last_res = z; + buf += 16; + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 15 < buf_end; buf += 16) { + // scan more data + data = load128(buf); + z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_buf = buf; + last_res = z; + } + assert(buf <= buf_end && buf >= buf_end - 16); + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + return NULL; +} + +/* + * 32-byte pipeline, for bigger scans + */ +static +const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi, + const u8 *buf, const u8 *buf_end, + const m128 low4bits, + const m128 zeroes, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 res; + + // pipeline prologue: scan first 32 bytes + m128 data1 = load128(buf); + u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; + m128 data2 = load128(buf + 16); + u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; + + // store the results + u32 last_res = z1 | (z2 << 16); + last_buf = buf; + buf += 32; + + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 31 < buf_end; buf += 32) { + // scan more data + data1 = load128(buf); + z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; + data2 = load128(buf + 16); + z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; + res = z1 | (z2 << 16); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_res = res; + last_buf = buf; + } + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + // if we still have some data left, scan it too + for (; buf + 15 < buf_end; buf += 16) { + m128 chars = load128(buf); + ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, + low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + } + assert(buf <= buf_end && buf >= buf_end - 16); + + return NULL; +} + +const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, + const u8 *buf, + const u8 *buf_end, u8 run_len +#ifdef MULTIACCEL_DOUBLE + , u8 run_len2 +#endif + ) { + assert(buf && buf_end); + assert(buf < buf_end); + + // Slow path for small cases. + if (buf_end - buf < 16) { + return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, + buf, buf_end); + } + + const m128 zeroes = zeroes128(); + const m128 low4bits = _mm_set1_epi8(0xf); + const u8 *rv; + + size_t min = (size_t)buf % 16; + assert(buf_end - buf >= 16); + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, + low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += (16 - min); + + // if we have enough data, run bigger pipeline; otherwise run smaller one + if (buf_end - buf >= 128) { + rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi, + buf, buf_end, low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(rv)) { + return rv; + } + } else if (buf_end - buf >= 16){ + rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi, + buf, buf_end, low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(rv)) { + return rv; + } + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf_end. + chars = loadu128(buf_end - 16); + rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, + buf_end - 16, low4bits, zeroes, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + + return buf_end; +} diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index 7d50709c3..b1fec4887 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -38,20 +38,9 @@ #include "util/simd_utils.h" #include "util/unaligned.h" -/** \brief Naive byte-by-byte implementation. */ -static really_inline -const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, - const u8 *buf_end) { - assert(buf < buf_end); +#include "shufti_common.h" - for (; buf < buf_end; ++buf) { - u8 c = *buf; - if (lo[c & 0xf] & hi[c >> 4]) { - break; - } - } - return buf; -} +#include "util/simd_utils_ssse3.h" /** \brief Naive byte-by-byte implementation. */ static really_inline @@ -68,54 +57,11 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, return buf_end; } -#ifdef DEBUG -#include - -#define DUMP_MSK(_t) \ -static UNUSED \ -void dumpMsk##_t(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - for (int j = 0; j < 8; j++) { \ - if ((c >> (7-j)) & 0x1) \ - printf("1"); \ - else \ - printf("0"); \ - } \ - printf(" "); \ - } \ -} \ -static UNUSED \ -void dumpMsk##_t##AsChars(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - if (isprint(c)) \ - printf("%c",c); \ - else \ - printf("."); \ - } \ -} - -DUMP_MSK(128) -#endif - -#include "util/simd_utils_ssse3.h" - #if !defined(__AVX2__) /* Normal SSSE3 shufti */ -#define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4) - static really_inline -const u8 *firstMatch(const u8 *buf, m128 t, m128 compare) { -#ifdef DEBUG - DEBUG_PRINTF("confirming match in:"); dumpMsk128(t); printf("\n"); -#endif - - u32 z = movemask128(eq128(t, compare)); +const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffff)) { u32 pos = ctz32(~z & 0xffff); assert(pos < 16); @@ -128,19 +74,9 @@ const u8 *firstMatch(const u8 *buf, m128 t, m128 compare) { static really_inline const u8 *fwdBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, const m128 low4bits, const m128 zeroes) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); - m128 t = and128(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); -#endif + u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - return firstMatch(buf, t, zeroes); + return firstMatch(buf, z); } const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, @@ -307,7 +243,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, DEBUG_PRINTF(" t2: "); dumpMsk128(t2); printf("\n"); #endif - return firstMatch(buf, t2, ones); + u32 z = movemask128(eq128(t2, ones)); + return firstMatch(buf, z); } const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, @@ -356,20 +293,8 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, #else // AVX2 - 256 wide shuftis -#ifdef DEBUG -DUMP_MSK(256) -#endif - -#define GET_LO_4(chars) and256(chars, low4bits) -#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4) - static really_inline -const u8 *firstMatch(const u8 *buf, m256 t, m256 compare) { -#ifdef DEBUG - DEBUG_PRINTF("confirming match in:"); dumpMsk256(t); printf("\n"); -#endif - - u32 z = movemask256(eq256(t, compare)); +const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { u32 pos = ctz32(~z); assert(pos < 32); @@ -382,19 +307,9 @@ const u8 *firstMatch(const u8 *buf, m256 t, m256 compare) { static really_inline const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, const m256 low4bits, const m256 zeroes) { - m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); - m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); - m256 t = and256(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); -#endif + u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - return firstMatch(buf, t, zeroes); + return firstMatch(buf, z); } /* takes 128 bit masks, but operates on 256 bits of data */ @@ -564,8 +479,9 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, DEBUG_PRINTF(" c2_hi: "); dumpMsk256(c2_hi); printf("\n"); DEBUG_PRINTF(" t2: "); dumpMsk256(t2); printf("\n"); #endif + u32 z = movemask256(eq256(t2, ones)); - return firstMatch(buf, t2, ones); + return firstMatch(buf, z); } /* takes 128 bit masks, but operates on 256 bits of data */ diff --git a/src/nfa/shufti_common.h b/src/nfa/shufti_common.h new file mode 100644 index 000000000..9c11f2b9e --- /dev/null +++ b/src/nfa/shufti_common.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHUFTI_COMMON_H_ +#define SHUFTI_COMMON_H_ + +#include "ue2common.h" + +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/unaligned.h" +#include "util/simd_utils_ssse3.h" + +/* + * Common stuff for all versions of shufti (single, multi and multidouble) + */ + +/** \brief Naive byte-by-byte implementation. */ +static really_inline +const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, + const u8 *buf_end) { + assert(buf < buf_end); + + for (; buf < buf_end; ++buf) { + u8 c = *buf; + if (lo[c & 0xf] & hi[c >> 4]) { + break; + } + } + return buf; +} + +#ifdef DEBUG +#include + +#define DUMP_MSK(_t) \ +static UNUSED \ +void dumpMsk##_t(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + for (int j = 0; j < 8; j++) { \ + if ((c >> (7-j)) & 0x1) \ + printf("1"); \ + else \ + printf("0"); \ + } \ + printf(" "); \ + } \ +} \ +static UNUSED \ +void dumpMsk##_t##AsChars(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + if (isprint(c)) \ + printf("%c",c); \ + else \ + printf("."); \ + } \ +} + +#endif + +#if !defined(__AVX2__) + +#ifdef DEBUG +DUMP_MSK(128) +#endif + +#define GET_LO_4(chars) and128(chars, low4bits) +#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4) + +static really_inline +u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, + const m128 compare) { + m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 t = and128(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); +#endif + return movemask128(eq128(t, compare)); +} + +#else + +#ifdef DEBUG +DUMP_MSK(256) +#endif + +#define GET_LO_4(chars) and256(chars, low4bits) +#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4) + +static really_inline +u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, + const m256 compare) { + m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); + m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); + m256 t = and256(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); +#endif + + return movemask256(eq256(t, compare)); +} + +#endif + + +#endif /* SHUFTI_COMMON_H_ */ From 081b3ef36963ed10b1b88c5053e42bda4b85d658 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 13:10:57 +0000 Subject: [PATCH 027/218] Multibyte truffle runtime --- CMakeLists.txt | 5 + src/nfa/accel.c | 60 ++++++++ src/nfa/accel.h | 22 +++ src/nfa/accel_dump.cpp | 51 ++++++- src/nfa/limex_accel.c | 41 ++++++ src/nfa/multitruffle.c | 111 +++++++++++++++ src/nfa/multitruffle.h | 73 ++++++++++ src/nfa/multitruffle_avx2.h | 125 +++++++++++++++++ src/nfa/multitruffle_sse.h | 265 ++++++++++++++++++++++++++++++++++++ 9 files changed, 746 insertions(+), 7 deletions(-) create mode 100644 src/nfa/multitruffle.c create mode 100644 src/nfa/multitruffle.h create mode 100644 src/nfa/multitruffle_avx2.h create mode 100644 src/nfa/multitruffle_sse.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 353bc5611..db123c1b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -448,6 +448,10 @@ set (hs_exec_SRCS src/nfa/multishufti_avx2.h src/nfa/multishufti_sse.h src/nfa/multishufti.h + src/nfa/multitruffle.c + src/nfa/multitruffle_avx2.h + src/nfa/multitruffle_sse.h + src/nfa/multitruffle.h src/nfa/multivermicelli.c src/nfa/multivermicelli.h src/nfa/multivermicelli_sse.h @@ -462,6 +466,7 @@ set (hs_exec_SRCS src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h + src/nfa/truffle_common.h src/nfa/truffle.c src/nfa/truffle.h src/nfa/vermicelli.h diff --git a/src/nfa/accel.c b/src/nfa/accel.c index ee081154d..a8fc4e36a 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -31,6 +31,7 @@ #include "truffle.h" #include "vermicelli.h" #include "multishufti.h" +#include "multitruffle.h" #include "multivermicelli.h" #include "ue2common.h" @@ -274,6 +275,65 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, accel->mdshufti.len1, accel->mdshufti.len2); break; + case ACCEL_MLTRUFFLE: + DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MLGTRUFFLE: + DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MSTRUFFLE: + DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MSGTRUFFLE: + DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MDSTRUFFLE: + DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshift_truffleExec(accel->mdtruffle.mask1, + accel->mdtruffle.mask2, c, c_end, + accel->mdtruffle.len1, + accel->mdtruffle.len2); + break; + case ACCEL_MDSGTRUFFLE: + DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1, + accel->mdtruffle.mask2, c, c_end, + accel->mdtruffle.len1, + accel->mdtruffle.len2); + break; + default: assert(!"not here"); diff --git a/src/nfa/accel.h b/src/nfa/accel.h index 87acf6cf9..af0295665 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -81,6 +81,13 @@ enum AccelType { ACCEL_MSGSHUFTI, ACCEL_MDSSHUFTI, ACCEL_MDSGSHUFTI, + /* multibyte truffles */ + ACCEL_MLTRUFFLE, + ACCEL_MLGTRUFFLE, + ACCEL_MSTRUFFLE, + ACCEL_MSGTRUFFLE, + ACCEL_MDSTRUFFLE, + ACCEL_MDSGTRUFFLE }; /** \brief Structure for accel framework. */ @@ -149,6 +156,21 @@ union AccelAux { m128 mask1; m128 mask2; } truffle; + struct { + u8 accel_type; + u8 offset; + m128 mask1; + m128 mask2; + u8 len; + } mtruffle; + struct { + u8 accel_type; + u8 offset; + m128 mask1; + m128 mask2; + u8 len1; + u8 len2; + } mdtruffle; }; /** diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 5a28c6a08..2370718ad 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -122,6 +122,18 @@ const char *accelName(u8 accel_type) { return "multibyte doubleshift shufti"; case ACCEL_MDSGSHUFTI: return "multibyte doubleshift-grab shufti"; + case ACCEL_MLTRUFFLE: + return "multibyte long truffle"; + case ACCEL_MLGTRUFFLE: + return "multibyte long-grab truffle"; + case ACCEL_MSTRUFFLE: + return "multibyte shift truffle"; + case ACCEL_MSGTRUFFLE: + return "multibyte shift-grab truffle"; + case ACCEL_MDSTRUFFLE: + return "multibyte doubleshift truffle"; + case ACCEL_MDSGTRUFFLE: + return "multibyte doubleshift-grab truffle"; default: return "unknown!"; } @@ -142,6 +154,22 @@ void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) { dumpMask((const u8 *)&hi, 128).c_str()); } +static +void dumpTruffleCharReach(FILE *f, const m128 &hiset, const m128 &hiclear) { + CharReach cr = truffle2cr(hiset, hiclear); + fprintf(f, "count %zu class %s\n", cr.count(), + describeClass(cr).c_str()); +} + +static +void dumpTruffleMasks(FILE *f, const m128 &hiset, const m128 &hiclear) { + fprintf(f, "lo %s\n", + dumpMask((const u8 *)&hiset, 128).c_str()); + fprintf(f, "hi %s\n", + dumpMask((const u8 *)&hiclear, 128).c_str()); +} + + void dumpAccelInfo(FILE *f, const AccelAux &accel) { fprintf(f, " %s", accelName(accel.accel_type)); if (accel.generic.offset) { @@ -176,13 +204,8 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { break; case ACCEL_TRUFFLE: { fprintf(f, "\n"); - fprintf(f, "lo %s\n", - dumpMask((const u8 *)&accel.truffle.mask1, 128).c_str()); - fprintf(f, "hi %s\n", - dumpMask((const u8 *)&accel.truffle.mask2, 128).c_str()); - CharReach cr = truffle2cr(accel.truffle.mask1, accel.truffle.mask2); - fprintf(f, "count %zu class %s\n", cr.count(), - describeClass(cr).c_str()); + dumpTruffleMasks(f, accel.truffle.mask1, accel.truffle.mask2); + dumpTruffleCharReach(f, accel.truffle.mask1, accel.truffle.mask2); break; } case ACCEL_MLVERM: @@ -216,6 +239,20 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi); dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi); break; + case ACCEL_MLTRUFFLE: + case ACCEL_MLGTRUFFLE: + case ACCEL_MSTRUFFLE: + case ACCEL_MSGTRUFFLE: + fprintf(f, " len:%u\n", accel.mtruffle.len); + dumpTruffleMasks(f, accel.mtruffle.mask1, accel.mtruffle.mask2); + dumpTruffleCharReach(f, accel.mtruffle.mask1, accel.mtruffle.mask2); + break; + case ACCEL_MDSTRUFFLE: + case ACCEL_MDSGTRUFFLE: + fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2); + dumpTruffleMasks(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); + dumpTruffleCharReach(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); + break; default: fprintf(f, "\n"); break; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index c12f917ac..77ed5ac07 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -39,6 +39,7 @@ #include "shufti.h" #include "truffle.h" #include "multishufti.h" +#include "multitruffle.h" #include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" @@ -182,6 +183,46 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { offset = aux->truffle.offset; ptr = truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end); break; + case ACCEL_MLTRUFFLE: + DEBUG_PRINTF("long match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = long_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MLGTRUFFLE: + DEBUG_PRINTF("long grab match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = longgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MSTRUFFLE: + DEBUG_PRINTF("shift match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = shift_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MSGTRUFFLE: + DEBUG_PRINTF("shift grab match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = shiftgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MDSTRUFFLE: + DEBUG_PRINTF("double shift match truffle shuffle\n"); + offset = aux->mdtruffle.offset; + ptr = doubleshift_truffleExec(aux->mdtruffle.mask1, + aux->mdtruffle.mask2, ptr, end, + aux->mdtruffle.len1, + aux->mdtruffle.len2); + break; + case ACCEL_MDSGTRUFFLE: + DEBUG_PRINTF("double shift grab match truffle shuffle\n"); + offset = aux->mdtruffle.offset; + ptr = doubleshiftgrab_truffleExec(aux->mdtruffle.mask1, + aux->mdtruffle.mask2, ptr, end, + aux->mdtruffle.len1, + aux->mdtruffle.len2); + break; case ACCEL_RED_TAPE: ptr = end; /* there is no escape */ offset = aux->generic.offset; diff --git a/src/nfa/multitruffle.c b/src/nfa/multitruffle.c new file mode 100644 index 000000000..3af6394ad --- /dev/null +++ b/src/nfa/multitruffle.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ue2common.h" + +#include "multitruffle.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" + +#include "multiaccel_common.h" + +#if !defined(__AVX2__) + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#else + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#endif diff --git a/src/nfa/multitruffle.h b/src/nfa/multitruffle.h new file mode 100644 index 000000000..8703b5ca3 --- /dev/null +++ b/src/nfa/multitruffle.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTITRUFFLE_H +#define MULTITRUFFLE_H + +/** \file + * \brief Multitruffle: multibyte version of Truffle. + * + * Utilises the SSSE3 pshufb shuffle instruction + */ + +#include "util/simd_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len); + +const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len); + +const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len); + +const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +#ifdef __cplusplus +} +#endif + + +#endif /* MULTITRUFFLE_H */ diff --git a/src/nfa/multitruffle_avx2.h b/src/nfa/multitruffle_avx2.h new file mode 100644 index 000000000..e52db5fc9 --- /dev/null +++ b/src/nfa/multitruffle_avx2.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Matches a byte in a charclass using three shuffles + */ + +#include "config.h" +#include "ue2common.h" +#include "multiaccel_common.h" + +/* + * include "block" function + */ +#include "truffle_common.h" + +/* + * single-byte truffle fwd match function, should only be defined when not + * compiling multiaccel + */ +static really_inline +const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + m256 v, const u8 *buf, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); +} + +const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + DEBUG_PRINTF("run_len %zu\n", buf_end - buf); + const m256 wide_clear = set2x128(shuf_mask_lo_highclear); + const m256 wide_set = set2x128(shuf_mask_lo_highset); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf < 32) { + return truffleMini(wide_clear, wide_set, buf, buf_end); + } + + size_t min = (size_t)buf % 32; + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += (32 - min); + + const u8 *last_block = buf_end - 32; + while (buf < last_block) { + m256 lchars = load256(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars, + buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += 32; + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 32); + chars = loadu256(buf_end - 32); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, + buf_end - 32, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + + return buf_end; +} diff --git a/src/nfa/multitruffle_sse.h b/src/nfa/multitruffle_sse.h new file mode 100644 index 000000000..b287e4fc4 --- /dev/null +++ b/src/nfa/multitruffle_sse.h @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ue2common.h" +#include "multiaccel_common.h" + +/* + * include "block" function + */ +#include "truffle_common.h" + +/* + * single-byte truffle fwd match function, should only be defined when not + * compiling multiaccel + */ + +static really_inline +const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + m128 v, const u8 *buf, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF; + return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); +} + +/* + * 16-byte pipeline, for smaller scans + */ +static +const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 16 bytes + m128 data = load128(buf); + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; + last_buf = buf; + last_res = z; + buf += 16; + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 15 < buf_end; buf += 16) { + // scan more data + data = load128(buf); + z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_buf = buf; + last_res = z; + } + assert(buf <= buf_end && buf >= buf_end - 16); + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + return NULL; +} + +/* + * 32-byte pipeline, for bigger scans + */ +static +const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 res; + + // pipeline prologue: scan first 32 bytes + m128 data1 = load128(buf); + u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; + m128 data2 = load128(buf + 16); + u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; + + // store the results + u32 last_res = z1 | (z2 << 16); + last_buf = buf; + buf += 32; + + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 31 < buf_end; buf += 32) { + // scan more data + data1 = load128(buf); + z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; + data2 = load128(buf + 16); + z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; + res = z1 | (z2 << 16); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_res = res; + last_buf = buf; + } + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + // if we still have some data left, scan it too + for (; buf + 15 < buf_end; buf += 16) { + m128 chars = load128(buf); + ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, + chars, buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + } + assert(buf <= buf_end && buf >= buf_end - 16); + + return NULL; +} + +const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + DEBUG_PRINTF("run_len %zu\n", buf_end - buf); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf < 16) { + return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end); + } + + size_t min = (size_t)buf % 16; + assert(buf_end - buf >= 16); + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += (16 - min); + + // if we have enough data, run bigger pipeline; otherwise run smaller one + if (buf_end - buf >= 128) { + rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(rv)) { + return rv; + } + } else if (buf_end - buf >= 16){ + rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(rv)) { + return rv; + } + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf_end. + chars = loadu128(buf_end - 16); + rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, + buf_end - 16, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + + return buf_end; +} From 87424713a750feabfa5661c124e94476d769ab9f Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 13:38:58 +0000 Subject: [PATCH 028/218] Multibyte acceleration compile side --- CMakeLists.txt | 2 + src/nfa/accelcompile.cpp | 274 ++++++++++++++++- src/nfa/accelcompile.h | 10 +- src/nfa/limex_compile.cpp | 54 +++- src/nfa/multiaccel_compilehelper.cpp | 439 +++++++++++++++++++++++++++ src/nfa/multiaccel_compilehelper.h | 75 +++++ src/nfagraph/ng_limex_accel.cpp | 131 ++++++++ src/nfagraph/ng_limex_accel.h | 30 ++ 8 files changed, 1002 insertions(+), 13 deletions(-) create mode 100644 src/nfa/multiaccel_compilehelper.cpp create mode 100644 src/nfa/multiaccel_compilehelper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index db123c1b5..1abab0fe0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -599,6 +599,8 @@ SET (hs_SRCS src/nfa/mpv_internal.h src/nfa/mpvcompile.cpp src/nfa/mpvcompile.h + src/nfa/multiaccel_compilehelper.cpp + src/nfa/multiaccel_compilehelper.h src/nfa/nfa_api.h src/nfa/nfa_api_queue.h src/nfa/nfa_api_util.h diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 2a22716af..5739618a3 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -169,13 +169,285 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_NONE; } +static +void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { + if (info.ma_type == MultibyteAccelInfo::MAT_NONE) { + DEBUG_PRINTF("no multimatch for us :("); + return; + } + + u32 offset = info.multiaccel_offset; + const CharReach &stops = info.multiaccel_stops; + + assert(aux->accel_type == ACCEL_NONE); + if (stops.all()) { + return; + } + + size_t outs = stops.count(); + DEBUG_PRINTF("%zu outs\n", outs); + assert(outs && outs < 256); + + switch (info.ma_type) { + case MultibyteAccelInfo::MAT_LONG: + if (outs == 1) { + aux->accel_type = ACCEL_MLVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MLVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_LONGGRAB: + if (outs == 1) { + aux->accel_type = ACCEL_MLGVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MLGVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_SHIFT: + if (outs == 1) { + aux->accel_type = ACCEL_MSVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MSVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_SHIFTGRAB: + if (outs == 1) { + aux->accel_type = ACCEL_MSGVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MSGVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_DSHIFT: + if (outs == 1) { + aux->accel_type = ACCEL_MDSVERM; + aux->mdverm.offset = offset; + aux->mdverm.c = stops.find_first(); + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MDSVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + if (outs == 1) { + aux->accel_type = ACCEL_MDSGVERM; + aux->mdverm.offset = offset; + aux->mdverm.c = stops.find_first(); + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MDSGVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + default: + // shouldn't happen + assert(0); + return; + } + + DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); + + switch (info.ma_type) { + case MultibyteAccelInfo::MAT_LONG: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MLSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_LONGGRAB: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MLGSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_SHIFT: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MSSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_SHIFTGRAB: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MSGSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_DSHIFT: + if (shuftiBuildMasks(stops, &aux->mdshufti.lo, + &aux->mdshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MDSSHUFTI; + aux->mdshufti.offset = offset; + aux->mdshufti.len1 = info.ma_len1; + aux->mdshufti.len2 = info.ma_len2; + return; + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + if (shuftiBuildMasks(stops, &aux->mdshufti.lo, + &aux->mdshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MDSGSHUFTI; + aux->mdshufti.offset = offset; + aux->mdshufti.len1 = info.ma_len1; + aux->mdshufti.len2 = info.ma_len2; + return; + default: + // shouldn't happen + assert(0); + return; + } + DEBUG_PRINTF("shufti build failed, falling through\n"); + + if (outs <= ACCEL_MAX_STOP_CHAR) { + DEBUG_PRINTF("building Truffle for %zu chars\n", outs); + switch (info.ma_type) { + case MultibyteAccelInfo::MAT_LONG: + aux->accel_type = ACCEL_MLTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_LONGGRAB: + aux->accel_type = ACCEL_MLGTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_SHIFT: + aux->accel_type = ACCEL_MSTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_SHIFTGRAB: + aux->accel_type = ACCEL_MSGTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_DSHIFT: + aux->accel_type = ACCEL_MDSTRUFFLE; + aux->mdtruffle.offset = offset; + aux->mdtruffle.len1 = info.ma_len1; + aux->mdtruffle.len2 = info.ma_len2; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mdtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + aux->accel_type = ACCEL_MDSGTRUFFLE; + aux->mdtruffle.offset = offset; + aux->mdtruffle.len1 = info.ma_len1; + aux->mdtruffle.len2 = info.ma_len2; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mdtruffle.mask2); + break; + default: + // shouldn't happen + assert(0); + return; + } + return; + } + + DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs); +} + bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { assert(aux->accel_type == ACCEL_NONE); if (info.single_stops.none()) { DEBUG_PRINTF("picked red tape\n"); aux->accel_type = ACCEL_RED_TAPE; aux->generic.offset = info.single_offset; - } else { + } + if (aux->accel_type == ACCEL_NONE) { + buildAccelMulti(info, aux); + } + if (aux->accel_type == ACCEL_NONE) { buildAccelDouble(info, aux); } if (aux->accel_type == ACCEL_NONE) { diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 12af559ce..e9467531d 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -32,6 +32,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/ue2_containers.h" +#include "nfagraph/ng_limex_accel.h" union AccelAux; @@ -39,7 +40,9 @@ namespace ue2 { struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), - single_stops(CharReach::dot()) {} + single_stops(CharReach::dot()), + multiaccel_offset(0), ma_len1(0), ma_len2(0), + ma_type(MultibyteAccelInfo::MAT_NONE) {} u32 single_offset; /**< offset correction to apply to single schemes */ u32 double_offset; /**< offset correction to apply to double schemes */ CharReach double_stop1; /**< single-byte accel stop literals for double @@ -47,6 +50,11 @@ struct AccelInfo { flat_set> double_stop2; /**< double-byte accel stop * literals */ CharReach single_stops; /**< escapes for single byte acceleration */ + u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */ + CharReach multiaccel_stops; /**< escapes for multibyte acceleration */ + u32 ma_len1; /**< multiaccel len1 */ + u32 ma_len2; /**< multiaccel len2 */ + MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */ }; bool buildAccelAux(const AccelInfo &info, AccelAux *aux); diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index a6c34cb66..d3e1a8ee0 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -80,9 +80,11 @@ struct precalcAccel { CharReach double_cr; flat_set> double_lits; /* double-byte accel stop literals */ u32 double_offset; + + MultibyteAccelInfo ma_info; }; -struct meteor_accel_info { +struct limex_accel_info { ue2::unordered_set accelerable; map precalc; ue2::unordered_map > friends; @@ -162,7 +164,7 @@ struct build_info { bool stateCompression; const CompileContext &cc; u32 num_states; - meteor_accel_info accel; + limex_accel_info accel; }; // Constants for scoring mechanism @@ -334,12 +336,16 @@ void buildReachMapping(const build_info &args, vector &reach, } struct AccelBuild { - AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0) {} + AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0), + ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {} NFAVertex v; u32 state; u32 offset; // offset correction to apply CharReach stop1; // single-byte accel stop literals flat_set> stop2; // double-byte accel stop literals + u32 ma_len1; // multiaccel len1 + u32 ma_len2; // multiaccel len2 + MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type }; static @@ -354,7 +360,12 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) { build.stop1 = CharReach::dot(); } else { const precalcAccel &precalc = bi.accel.precalc.at(ss); - if (precalc.double_lits.empty()) { + unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2; + if (ma_len >= MULTIACCEL_MIN_LEN) { + build.ma_len1 = precalc.ma_info.len1; + build.stop1 = precalc.ma_info.cr; + build.offset = precalc.ma_info.offset; + } else if (precalc.double_lits.empty()) { build.stop1 = precalc.single_cr; build.offset = precalc.single_offset; } else { @@ -534,7 +545,7 @@ void filterAccelStates(NGHolder &g, const map &tops, } static -bool containsBadSubset(const meteor_accel_info &accel, +bool containsBadSubset(const limex_accel_info &accel, const NFAStateSet &state_set, const u32 effective_sds) { NFAStateSet subset(state_set.size()); for (size_t j = state_set.find_first(); j != state_set.npos; @@ -559,7 +570,8 @@ void doAccelCommon(NGHolder &g, ue2::unordered_map &accel_map, const ue2::unordered_map &state_ids, const map &br_cyclic, - const u32 num_states, meteor_accel_info *accel) { + const u32 num_states, limex_accel_info *accel, + const CompileContext &cc) { vector refined_cr = reduced_cr(g, br_cyclic); vector astates; @@ -607,10 +619,22 @@ void doAccelCommon(NGHolder &g, DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset); + // try multibyte acceleration first + MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc); + precalcAccel &pa = accel->precalc[state_set]; + useful |= state_set; + + // if we successfully built a multibyte accel scheme, use that + if (mai.type != MultibyteAccelInfo::MAT_NONE) { + pa.ma_info = mai; + + DEBUG_PRINTF("multibyte acceleration!\n"); + continue; + } + pa.single_offset = as.offset; pa.single_cr = as.cr; - useful |= state_set; if (states.size() == 1) { DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front()); @@ -660,7 +684,7 @@ void fillAccelInfo(build_info &bi) { filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map); assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES); doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic, - bi.num_states, &bi.accel); + bi.num_states, &bi.accel, bi.cc); } /** The AccelAux structure has large alignment specified, and this makes some @@ -672,7 +696,7 @@ static void buildAccel(const build_info &args, NFAStateSet &accelMask, NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec, vector &accelTable) { - const meteor_accel_info &accel = args.accel; + const limex_accel_info &accel = args.accel; // Init, all zeroes. accelMask.resize(args.num_states); @@ -737,8 +761,16 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, if (contains(accel.precalc, states)) { const precalcAccel &precalc = accel.precalc.at(states); - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; + if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { + ainfo.ma_len1 = precalc.ma_info.len1; + ainfo.ma_len2 = precalc.ma_info.len2; + ainfo.multiaccel_offset = precalc.ma_info.offset; + ainfo.multiaccel_stops = precalc.ma_info.cr; + ainfo.ma_type = precalc.ma_info.type; + } else { + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; + } } buildAccelAux(ainfo, &aux); diff --git a/src/nfa/multiaccel_compilehelper.cpp b/src/nfa/multiaccel_compilehelper.cpp new file mode 100644 index 000000000..f1cf2a4c1 --- /dev/null +++ b/src/nfa/multiaccel_compilehelper.cpp @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "multiaccel_compilehelper.h" + +using namespace std; +using namespace ue2; + +#ifdef DEBUG +static const char* state_to_str[] = { + "FIRST_RUN", + "SECOND_RUN", + "WAITING_FOR_GRAB", + "FIRST_TAIL", + "SECOND_TAIL", + "STOPPED", + "INVALID" +}; +static const char* type_to_str[] = { + "SHIFT", + "SHIFTGRAB", + "DOUBLESHIFT", + "DOUBLESHIFTGRAB", + "LONG", + "LONGGRAB", + "NONE" +}; + +static +void dumpMultiaccelState(const accel_data &d) { + DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n", + type_to_str[(unsigned) d.type], + state_to_str[(unsigned) d.state], + d.len1, d.tlen1, d.len2, d.tlen2); +} +#endif + +/* stop all the matching. this may render most schemes invalid. */ +static +void stop(accel_data &d) { + switch (d.state) { + case STATE_STOPPED: + case STATE_INVALID: + break; + case STATE_FIRST_TAIL: + case STATE_SECOND_RUN: + /* + * Shift matchers are special case, because they have "tails". + * When shift matcher reaches a mid/endpoint, tail mode is + * activated, which looks for more matches to extend the match. + * + * For example, consider pattern /a{5}ba{3}/. Under normal circumstances, + * long-grab matcher will be picked for this pattern (matching a run of a's, + * followed by a not-a), because doubleshift matcher would be confused by + * consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts + * by 1) and throw out the rest of the pattern. + * + * With tails, we defer ending the run until we actually run out of + * matching characters, so the above pattern will now be parsed by + * doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4). + * + * So if we are stopping shift matchers, we should check if we aren't in + * the process of matching first tail or second run. If we are, we can't + * finish the second run as we are stopping, but we can try and split + * the first tail instead to obtain a valid second run. + */ + if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || + d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) { + // can't split an empty void... + d.state = STATE_INVALID; + break; + } + d.len2 = 0; + d.state = STATE_STOPPED; + break; + case STATE_SECOND_TAIL: + d.state = STATE_STOPPED; + break; + case STATE_WAITING_FOR_GRAB: + case STATE_FIRST_RUN: + if (d.type == MultibyteAccelInfo::MAT_LONG) { + d.state = STATE_STOPPED; + } else { + d.state = STATE_INVALID; + } + break; + } +} + +static +void validate(accel_data &d, unsigned max_len) { + // try and fit in all our tails + if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) { + // case 1: everything fits in + d.len1 += d.tlen1; + d.len2 += d.tlen2; + d.tlen1 = 0; + d.tlen2 = 0; + } else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) { + // case 2: everything but the second tail fits in + d.len1 += d.tlen1; + d.tlen1 = 0; + // try going for a partial tail + if (d.tlen2 != 0) { + int new_tlen2 = max_len - 1 - d.len1 - d.len2; + if (new_tlen2 > 0) { + d.len2 += new_tlen2; + } + d.tlen2 = 0; + } + } else if (d.len1 + d.tlen1 < max_len) { + // case 3: first run and its tail fits in + if (d.type == MultibyteAccelInfo::MAT_DSHIFT || + d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { + // split the tail into a second run + d.len2 = d.tlen1; + } else { + d.len1 += d.tlen1; + d.len2 = 0; + } + d.tlen1 = 0; + d.tlen2 = 0; + } else if (d.len1 < max_len) { + // case 4: nothing but the first run fits in + // try going for a partial tail + if (d.tlen1 != 0) { + int new_tlen1 = max_len - 1 - d.len1; + if (new_tlen1 > 0) { + d.len1 += new_tlen1; + } + d.tlen1 = 0; + } + d.len2 = 0; + d.tlen2 = 0; + } + // if we removed our second run, doubleshift matchers are no longer valid + if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || + d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) { + d.state = STATE_INVALID; + } else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) { + // long matchers can just stop whenever they want to + d.len1 = max_len - 1; + } + + // now, general sanity checks + if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) { + d.state = STATE_INVALID; + } + if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) { + d.state = STATE_INVALID; + } +} + +static +void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) { + switch (d.type) { + case MultibyteAccelInfo::MAT_LONG: + { + /* + * For long matcher, we want lots of consecutive same-or-subset + * char-reaches + */ + if ((ref_cr & cur_cr) == cur_cr) { + d.len1++; + } else { + d.state = STATE_STOPPED; + } + } + break; + + case MultibyteAccelInfo::MAT_LONGGRAB: + { + /* + * For long-grab matcher, we want lots of consecutive same-or-subset + * char-reaches with a negative match in the end. + */ + if ((ref_cr & cur_cr) == cur_cr) { + d.len1++; + } else if (!(ref_cr & cur_cr).any()) { + /* we grabbed, stop immediately */ + d.state = STATE_STOPPED; + } else { + /* our run-n-grab was interrupted; mark as invalid */ + d.state = STATE_INVALID; + } + } + break; + + case MultibyteAccelInfo::MAT_SHIFTGRAB: + { + /* + * For shift-grab matcher, we want two matches separated by anything; + * however the second vertex *must* be a negative (non-overlapping) match. + * + * Shiftgrab matcher is identical to shift except for presence of grab. + */ + if (d.state == STATE_WAITING_FOR_GRAB) { + if ((ref_cr & cur_cr).any()) { + d.state = STATE_INVALID; + } else { + d.state = STATE_FIRST_RUN; + d.len1++; + } + return; + } + } + /* no break, falling through */ + case MultibyteAccelInfo::MAT_SHIFT: + { + /* + * For shift-matcher, we want two matches separated by anything. + */ + if (ref_cr == cur_cr) { + // keep matching tail + switch (d.state) { + case STATE_FIRST_RUN: + d.state = STATE_FIRST_TAIL; + break; + case STATE_FIRST_TAIL: + d.tlen1++; + break; + default: + // shouldn't happen + assert(0); + } + } else { + switch (d.state) { + case STATE_FIRST_RUN: + // simply advance + d.len1++; + break; + case STATE_FIRST_TAIL: + // we found a non-matching char after tail, so stop + d.state = STATE_STOPPED; + break; + default: + // shouldn't happen + assert(0); + } + } + } + break; + + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + { + /* + * For double shift-grab matcher, we want two matches separated by + * either negative matches or dots; however the second vertex *must* + * be a negative match. + * + * Doubleshiftgrab matcher is identical to doubleshift except for + * presence of grab. + */ + if (d.state == STATE_WAITING_FOR_GRAB) { + if ((ref_cr & cur_cr).any()) { + d.state = STATE_INVALID; + } else { + d.state = STATE_FIRST_RUN; + d.len1++; + } + return; + } + } + /* no break, falling through */ + case MultibyteAccelInfo::MAT_DSHIFT: + { + /* + * For double shift matcher, we want three matches, each separated + * by a lot of anything. + * + * Doubleshift matcher is complicated by presence of tails. + */ + if (ref_cr == cur_cr) { + // decide if we are activating second shift or matching tails + switch (d.state) { + case STATE_FIRST_RUN: + d.state = STATE_FIRST_TAIL; + d.len2 = 1; // we're now ready for our second run + break; + case STATE_FIRST_TAIL: + d.tlen1++; + break; + case STATE_SECOND_RUN: + d.state = STATE_SECOND_TAIL; + break; + case STATE_SECOND_TAIL: + d.tlen2++; + break; + default: + // shouldn't happen + assert(0); + } + } else { + switch (d.state) { + case STATE_FIRST_RUN: + d.len1++; + break; + case STATE_FIRST_TAIL: + // start second run + d.state = STATE_SECOND_RUN; + d.len2++; + break; + case STATE_SECOND_RUN: + d.len2++; + break; + case STATE_SECOND_TAIL: + // stop + d.state = STATE_STOPPED; + break; + default: + // shouldn't happen + assert(0); + } + } + } + break; + + default: + // shouldn't happen + assert(0); + break; + } +} + +MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off, + unsigned max_len) : + cr(ref_cr), offset(off), max_len(max_len) { + int accel_num = (int) MultibyteAccelInfo::MAT_MAX; + accels.resize(accel_num); + + // mark everything as valid + for (int i = 0; i < accel_num; i++) { + accel_data &ad = accels[i]; + ad.len1 = 1; + ad.type = (MultibyteAccelInfo::multiaccel_type) i; + + /* for shift-grab matchers, we are waiting for the grab right at the start */ + if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB + || ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { + ad.state = STATE_WAITING_FOR_GRAB; + } else { + ad.state = STATE_FIRST_RUN; + } + } +} + +bool MultiaccelCompileHelper::canAdvance() { + for (const accel_data &ad : accels) { + if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) { + return true; + } + } + return false; +} + +void MultiaccelCompileHelper::advance(const CharReach &cur_cr) { + for (accel_data &ad : accels) { + if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) { + continue; + } + match(ad, cr, cur_cr); +#ifdef DEBUG + dumpMultiaccelState(ad); +#endif + } +} + +MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() { + int best_len = 0; + accel_data best; + + DEBUG_PRINTF("Stopping multiaccel compile\n"); + + for (accel_data &ad : accels) { + // stop our matching + stop(ad); + validate(ad, max_len); + +#ifdef DEBUG + dumpMultiaccelState(ad); +#endif + + // skip invalid schemes + if (ad.state == STATE_INVALID) { + continue; + } + DEBUG_PRINTF("Marking as viable\n"); + + // TODO: relative strengths of accel schemes? maybe e.g. a shorter + // long match would in some cases be preferable to a longer + // double shift match (for example, depending on length)? + int as_len = ad.len1 + ad.len2; + if (as_len >= best_len) { + DEBUG_PRINTF("Marking as best\n"); + best_len = as_len; + best = ad; + } + } + // if we found at least one accel scheme, return it + if (best.state != STATE_INVALID) { +#ifdef DEBUG + DEBUG_PRINTF("Picked best multiaccel state:\n"); + dumpMultiaccelState(best); +#endif + MultibyteAccelInfo info; + info.cr = cr; + info.offset = offset; + info.len1 = best.len1; + info.len2 = best.len2; + info.type = best.type; + return info; + } + return MultibyteAccelInfo(); +} diff --git a/src/nfa/multiaccel_compilehelper.h b/src/nfa/multiaccel_compilehelper.h new file mode 100644 index 000000000..27dbe634a --- /dev/null +++ b/src/nfa/multiaccel_compilehelper.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCELCOMPILE_H_ +#define MULTIACCELCOMPILE_H_ + +#include "ue2common.h" + +#include "nfagraph/ng_limex_accel.h" + +#include + +namespace ue2 { + +/* accel scheme state machine */ +enum accel_scheme_state { + STATE_FIRST_RUN, + STATE_SECOND_RUN, + STATE_WAITING_FOR_GRAB, + STATE_FIRST_TAIL, + STATE_SECOND_TAIL, + STATE_STOPPED, + STATE_INVALID +}; + +struct accel_data { + MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE; + accel_scheme_state state = STATE_INVALID; + unsigned len1 = 0; /* length of first run */ + unsigned len2 = 0; /* length of second run, if present */ + unsigned tlen1 = 0; /* first tail length */ + unsigned tlen2 = 0; /* second tail length */ +}; + +class MultiaccelCompileHelper { +private: + const CharReach &cr; + u32 offset; + std::vector accels; + unsigned max_len; +public: + MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len); + bool canAdvance(); + MultibyteAccelInfo getBestScheme(); + void advance(const ue2::CharReach &cr); +}; + +}; // namespace + +#endif /* MULTIACCELCOMPILE_H_ */ diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index da103f8d6..ed9f5bfe9 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -37,12 +37,15 @@ #include "ue2common.h" #include "nfa/accel.h" +#include "nfa/multiaccel_compilehelper.h" #include "util/bitutils.h" // for CASE_CLEAR #include "util/charreach.h" +#include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/target_info.h" #include #include @@ -647,6 +650,134 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { return g.startDs; } +static +NFAVertex find_next(const NFAVertex v, const NGHolder &g) { + NFAVertex res = NFAGraph::null_vertex(); + for (NFAVertex u : adjacent_vertices_range(v, g)) { + if (u != v) { + res = u; + break; + } + } + return res; +} + +/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ +MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, + const vector &states, + const CompileContext &cc) { + // For a set of states to be accelerable, we basically have to have only + // one state to accelerate. + if (states.size() != 1) { + DEBUG_PRINTF("can't accelerate multiple states\n"); + return MultibyteAccelInfo(); + } + + // Get our base vertex + NFAVertex v = states[0]; + + // We need the base vertex to be a self-looping dotall leading to exactly + // one vertex. + if (!hasSelfLoop(v, g)) { + DEBUG_PRINTF("base vertex has self-loop\n"); + return MultibyteAccelInfo(); + } + + if (!g[v].char_reach.all()) { + DEBUG_PRINTF("can't accelerate anything but dot\n"); + return MultibyteAccelInfo(); + } + + if (proper_out_degree(v, g) != 1) { + DEBUG_PRINTF("can't accelerate states with multiple successors\n"); + return MultibyteAccelInfo(); + } + + // find our start vertex + NFAVertex cur = find_next(v, g); + if (cur == NFAGraph::null_vertex()) { + DEBUG_PRINTF("invalid start vertex\n"); + return MultibyteAccelInfo(); + } + + bool has_offset = false; + u32 offset = 0; + CharReach cr = g[cur].char_reach; + + // if we start with a dot, we have an offset, so defer figuring out the + // real CharReach for this accel scheme + if (cr == CharReach::dot()) { + has_offset = true; + offset = 1; + } + + // figure out our offset + while (has_offset) { + // vertices have to have no self loops + if (hasSelfLoop(cur, g)) { + DEBUG_PRINTF("can't have self-loops\n"); + return MultibyteAccelInfo(); + } + + // we have to have exactly 1 successor to have this acceleration scheme + if (out_degree(cur, g) != 1) { + DEBUG_PRINTF("can't have multiple successors\n"); + return MultibyteAccelInfo(); + } + + cur = *adjacent_vertices(cur, g).first; + + // if we met a special vertex, bail out + if (is_special(cur, g)) { + DEBUG_PRINTF("can't have special vertices\n"); + return MultibyteAccelInfo(); + } + + // now, get the real char reach + if (g[cur].char_reach != CharReach::dot()) { + cr = g[cur].char_reach; + has_offset = false; + } else { + offset++; + } + } + + // now, fire up the compilation machinery + target_t ti = cc.target_info; + unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE; + MultiaccelCompileHelper mac(cr, offset, max_len); + + while (mac.canAdvance()) { + // vertices have to have no self loops + if (hasSelfLoop(cur, g)) { + break; + } + + // we have to have exactly 1 successor to have this acceleration scheme + if (out_degree(cur, g) != 1) { + break; + } + + cur = *adjacent_vertices(cur, g).first; + + // if we met a special vertex, bail out + if (is_special(cur, g)) { + break; + } + + mac.advance(g[cur].char_reach); + } + MultibyteAccelInfo mai = mac.getBestScheme(); +#ifdef DEBUG + DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n", + mai.type, mai.offset, mai.len1, mai.len2); + for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) { + DEBUG_PRINTF("multibyte accel char: %zu\n", c); + } +#endif + return mai; +} + /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const vector &refined_cr, diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 005eddd22..61dfaed99 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -50,6 +50,12 @@ namespace ue2 { #define MAX_MERGED_ACCEL_STOPS 200 #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ +#define MULTIACCEL_MIN_LEN 3 +#define MULTIACCEL_MAX_LEN_SSE 15 +#define MULTIACCEL_MAX_LEN_AVX2 31 + +// forward-declaration of CompileContext +struct CompileContext; void findAccelFriends(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, @@ -65,6 +71,25 @@ struct DoubleAccelInfo { DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v); +struct MultibyteAccelInfo { + /* multibyte accel schemes, ordered by strength */ + enum multiaccel_type { + MAT_SHIFT, + MAT_SHIFTGRAB, + MAT_DSHIFT, + MAT_DSHIFTGRAB, + MAT_LONG, + MAT_LONGGRAB, + MAT_MAX, + MAT_NONE = MAT_MAX + }; + CharReach cr; + u32 offset = 0; + u32 len1 = 0; + u32 len2 = 0; + multiaccel_type type = MAT_NONE; +}; + struct AccelScheme { AccelScheme(const CharReach &cr_in, u32 offset_in) : cr(cr_in), offset(offset_in) { @@ -109,6 +134,11 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, AccelScheme *as, bool allow_wide); +/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ +MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, + const std::vector &verts, + const CompileContext &cc); + } // namespace ue2 #endif From e6709cee5fcc3e1e49b70da5b9ff6439a8fc81e2 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 11:24:16 +0000 Subject: [PATCH 029/218] Bitmatcher unit-tests --- unit/CMakeLists.txt | 1 + unit/internal/multiaccel_shift.cpp | 81 ++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 unit/internal/multiaccel_shift.cpp diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index c58f64f24..513831b37 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -47,6 +47,7 @@ set(unit_internal_SOURCES internal/limex_nfa.cpp internal/masked_move.cpp internal/multi_bit.cpp + internal/multiaccel_shift.cpp internal/nfagraph_common.h internal/nfagraph_comp.cpp internal/nfagraph_equivalence.cpp diff --git a/unit/internal/multiaccel_shift.cpp b/unit/internal/multiaccel_shift.cpp new file mode 100644 index 000000000..d6019870d --- /dev/null +++ b/unit/internal/multiaccel_shift.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "src/ue2common.h" + +#include "gtest/gtest.h" +#include "nfa/multiaccel_common.h" + +/* + * Unit tests for the shifters. + * + * This is a bit messy, as shifters are macros, so we're using macros to test + * other macros. + */ + +#define TEST_SHIFT(n) \ + do { \ + u64a val = ((u64a) 1 << n) - 1; \ + JOIN(SHIFT, n)(val); \ + ASSERT_EQ(val, 1); \ + } while (0) + +TEST(MultiaccelShift, StaticShift) { + TEST_SHIFT(1); + TEST_SHIFT(2); + TEST_SHIFT(3); + TEST_SHIFT(4); + TEST_SHIFT(5); + TEST_SHIFT(6); + TEST_SHIFT(7); + TEST_SHIFT(8); + TEST_SHIFT(10); + TEST_SHIFT(11); + TEST_SHIFT(12); + TEST_SHIFT(13); + TEST_SHIFT(14); + TEST_SHIFT(15); + TEST_SHIFT(16); + TEST_SHIFT(17); + TEST_SHIFT(18); + TEST_SHIFT(19); + TEST_SHIFT(20); + TEST_SHIFT(21); + TEST_SHIFT(22); + TEST_SHIFT(23); + TEST_SHIFT(24); + TEST_SHIFT(25); + TEST_SHIFT(26); + TEST_SHIFT(27); + TEST_SHIFT(28); + TEST_SHIFT(29); + TEST_SHIFT(30); + TEST_SHIFT(31); + TEST_SHIFT(32); +} From fb932616cabab285c681f6ea26fc390945e8e756 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 9 Dec 2015 13:39:16 +0000 Subject: [PATCH 030/218] Multibyte matcher unit-tests --- unit/CMakeLists.txt | 1 + unit/internal/multiaccel_matcher.cpp | 289 +++++++++++++++++++++++++++ 2 files changed, 290 insertions(+) create mode 100644 unit/internal/multiaccel_matcher.cpp diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 513831b37..9afc95731 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -47,6 +47,7 @@ set(unit_internal_SOURCES internal/limex_nfa.cpp internal/masked_move.cpp internal/multi_bit.cpp + internal/multiaccel_matcher.cpp internal/multiaccel_shift.cpp internal/nfagraph_common.h internal/nfagraph_comp.cpp diff --git a/unit/internal/multiaccel_matcher.cpp b/unit/internal/multiaccel_matcher.cpp new file mode 100644 index 000000000..1e689430a --- /dev/null +++ b/unit/internal/multiaccel_matcher.cpp @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +extern "C" { +#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works +} + +#include "config.h" +#include "src/ue2common.h" + +#include "gtest/gtest.h" +#include "nfagraph/ng_limex_accel.h" +#include "nfa/accelcompile.h" +#include "nfa/multivermicelli.h" +#include "nfa/multishufti.h" +#include "nfa/multitruffle.h" +#include "util/charreach.h" + +#include +#include +#include +#include +#include + +using namespace ue2; +using namespace std; +using namespace testing; + +/* + * Static functions needed for this test's wellbeing + */ + +// char generator +static inline +char getChar(const CharReach &cr, bool match) { + char result; + do { + result = rand() % CharReach::npos; + } while (cr.test(result) != match); + return result; +} + +// appends a string with matches/unmatches according to input match pattern +static +void getMatch(vector &result, const string &pattern, const CharReach &cr) { + for (const auto &c : pattern) { + result.push_back(getChar(cr, c == '1')); + } +} + +// appends non-matching noise of certain lengths +static +void getNoise(vector &result, u32 len, const CharReach &cr) { + for (unsigned i = 0; i < len; i++) { + result.push_back(getChar(cr, false)); + } +} + +// test parameters structure +struct MultiaccelTestParam { + string match_pattern; + u32 match_pattern_start_idx; + u32 match_idx; + bool test_all_offsets; + u8 match_len1; + u8 match_len2; + MultibyteAccelInfo::multiaccel_type type; +}; + +// buffer size is constant +static const u32 BUF_SIZE = 200; + +// strings, out of which CharReach will be generated +static const string VERM_CR = "a"; +static const string V_NC_CR = "aA"; +static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz"; +static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99"; + +// Parameterized test case for multiaccel patterns. +class MultiaccelTest : public TestWithParam { +protected: + virtual void SetUp() { + // set up is deferred until the actual test, since we can't compile + // any accel schemes unless we know CharReach + const MultiaccelTestParam &p = GetParam(); + + // reserve space in our buffer + buffer.reserve(BUF_SIZE); + + // store the index where we expect to see the match. note that it may + // be different from where the match pattern has started since we may + // have a flooded match (i.e. a match preceded by almost-match) or a + // no-match (in which case "match" index is at the end of the buffer). + match_idx = p.match_idx; + + // make note if we need to test all offsets - sometimes we don't, for + // example when testing partial or no-match. + test_all_offsets = p.test_all_offsets; + } + + // deferred buffer generation, as we don't know CharReach before we run the test + void GenerateBuffer(const CharReach &cr) { + const MultiaccelTestParam &p = GetParam(); + + // step 1: fill prefix with non-matching noise + getNoise(buffer, p.match_pattern_start_idx, cr); + + // step 2: add a match + getMatch(buffer, p.match_pattern, cr); + + // step 3: fill in the rest of the buffer with non-matching noise + getNoise(buffer, BUF_SIZE - p.match_pattern.size() - + p.match_pattern_start_idx, cr); + } + + // deferred accel scheme generation, as we don't know CharReach before we run the test + void CompileAccelScheme(const CharReach &cr, AccelAux *aux) { + const MultiaccelTestParam &p = GetParam(); + + AccelInfo ai; + ai.single_stops = cr; // dummy CharReach to prevent red tape accel + ai.ma_len1 = p.match_len1; + ai.ma_len2 = p.match_len2; + ai.multiaccel_stops = cr; + ai.ma_type = p.type; + + buildAccelAux(ai, aux); + + // now, verify we've successfully built our accel scheme, *and* that it's + // a multibyte scheme + ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM && + aux->accel_type <= ACCEL_MDSGTRUFFLE); + } + + virtual void TearDown() { + } + + u32 match_idx; + vector buffer; + bool test_all_offsets; +}; + +static +void runTest(const vector &buffer, AccelAux *aux, unsigned match_idx, + bool test_all_offsets) { + const u8 *start = buffer.data(); + const u8 *end = start + buffer.size(); + const u8 *match = start + match_idx; + + // comparing indexes into the buffer is easier to understand than pointers + if (test_all_offsets) { + // run_accel can only scan >15 byte buffers + u32 end_offset = min(match_idx, (u32) buffer.size() - 15); + + for (unsigned offset = 0; offset < end_offset; offset++) { + const u8 *ptr = run_accel(aux, (start + offset), end); + unsigned idx = ptr - start; + ASSERT_EQ(match_idx, idx); + } + } else { + const u8 *ptr = run_accel(aux, start, end); + unsigned idx = ptr - start; + ASSERT_EQ(match_idx, idx); + } +} + +TEST_P(MultiaccelTest, TestVermicelli) { + AccelAux aux = {0}; + CharReach cr(VERM_CR); + + GenerateBuffer(cr); + + CompileAccelScheme(cr, &aux); + + runTest(buffer, &aux, match_idx, test_all_offsets); +} + +TEST_P(MultiaccelTest, TestVermicelliNocase) { + AccelAux aux = {0}; + CharReach cr(V_NC_CR); + + GenerateBuffer(cr); + + CompileAccelScheme(cr, &aux); + + runTest(buffer, &aux, match_idx, test_all_offsets); +} + +TEST_P(MultiaccelTest, TestShufti) { + AccelAux aux = {0}; + CharReach cr(SHUF_CR); + + GenerateBuffer(cr); + + CompileAccelScheme(cr, &aux); + + runTest(buffer, &aux, match_idx, test_all_offsets); +} + +TEST_P(MultiaccelTest, TestTruffle) { + AccelAux aux = {0}; + CharReach cr(TRUF_CR); + + GenerateBuffer(cr); + + CompileAccelScheme(cr, &aux); + + runTest(buffer, &aux, match_idx, test_all_offsets); +} + +static const MultiaccelTestParam multiaccelTests[] = { + // long matcher + + // full, partial, flooded, nomatch + {"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, + {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, + {"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, + {"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, + + // long-grab matcher + + // full, partial, flooded, nomatch + {"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, + {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, + {"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, + {"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, + + // shift matcher + + // full, partial, flooded, nomatch + {"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, + {"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, + {"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, + {"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, + + // shift-grab matcher + + // full, partial, flooded, nomatch + {"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, + {"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, + {"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, + {"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, + + // doubleshift matcher + + // full, partial (one and two shifts), flooded, nomatch + {"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, + {"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, + {"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, + {"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, + {"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, + + // doubleshift-grab matcher + + // full, partial (one and two shifts), flooded, nomatch + {"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, + {"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, + {"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, + {"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, + {"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, +}; + +INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests)); From 98eff64edf4957c44320664a552f71a03f73f825 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 14 Dec 2015 10:08:57 +1100 Subject: [PATCH 031/218] ng_prefilter: turn large max bound into inf During prefilter region replacement, turn regions with very large max bounds into repeats with inf max bound. This improves compile time and the likelihood that we will actually be able to build an implementation for such patterns. --- src/nfagraph/ng_prefilter.cpp | 42 ++++++++++++++++++++++++--------- unit/hyperscan/bad_patterns.txt | 1 - 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 54aeb28a7..c0caf1b9e 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -80,6 +80,10 @@ static const size_t BOUNDED_REPEAT_COUNT = 4; /** Scoring penalty for boundary regions. */ static const size_t PENALTY_BOUNDARY = 32; +/** Regions with max bounds greater than this value will have their max bound + * replaced with inf. */ +static const size_t MAX_REPLACE_BOUND = 10000; + namespace { /** Information describing a region. */ @@ -158,7 +162,7 @@ void markBoundaryRegions(const NGHolder &h, } u32 id = region_map.at(v); - map::iterator ri = regions.find(id); + auto ri = regions.find(id); if (ri == regions.end()) { continue; // Not tracking this region as it's too small. } @@ -176,16 +180,14 @@ map findRegionInfo(const NGHolder &h, continue; } u32 id = region_map.at(v); - RegionInfo &ri = regions.insert( - make_pair(id, RegionInfo(id))).first->second; + RegionInfo &ri = regions.emplace(id, RegionInfo(id)).first->second; ri.vertices.push_back(v); ri.reach |= h[v].char_reach; } // There's no point tracking more information about regions that we won't // consider replacing, so we remove them from the region map. - for (map::iterator it = regions.begin(); - it != regions.end();) { + for (auto it = regions.begin(); it != regions.end();) { if (it->second.vertices.size() < MIN_REPLACE_VERTICES) { regions.erase(it++); } else { @@ -217,7 +219,10 @@ void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to, if (contains(rverts, u)) { continue; } - if (edge(u, to, g).second) { + + // Check with edge_by_target to cope with predecessors with large + // fan-out. + if (edge_by_target(u, to, g).second) { continue; } @@ -250,17 +255,27 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri, assert(ri.vertices.size() >= MIN_REPLACE_VERTICES); assert(ri.minWidth.is_finite()); + depth minWidth = ri.minWidth; + depth maxWidth = ri.maxWidth; + + if (maxWidth > depth(MAX_REPLACE_BOUND)) { + DEBUG_PRINTF("using inf instead of large bound %s\n", + maxWidth.str().c_str()); + maxWidth = depth::infinity(); + } + size_t replacementSize; - if (ri.minWidth == ri.maxWidth || ri.maxWidth.is_infinite()) { - replacementSize = ri.minWidth; // {N} or {N,} + if (minWidth == maxWidth || maxWidth.is_infinite()) { + replacementSize = minWidth; // {N} or {N,} } else { - replacementSize = ri.maxWidth; // {N,M} case + replacementSize = maxWidth; // {N,M} case } DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(), replacementSize); - deque verts; + vector verts; + verts.reserve(replacementSize); for (size_t i = 0; i < replacementSize; i++) { NFAVertex v = add_vertex(g); g[v].char_reach = ri.reach; @@ -360,7 +375,8 @@ void prefilterReductions(NGHolder &h, const CompileContext &cc) { return; } - DEBUG_PRINTF("graph with %zu vertices\n", num_vertices(h)); + DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n", + num_vertices(h), num_edges(h)); h.renumberVertices(); h.renumberEdges(); @@ -369,6 +385,10 @@ void prefilterReductions(NGHolder &h, const CompileContext &cc) { h.renumberVertices(); h.renumberEdges(); + + DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n", + num_vertices(h), num_edges(h)); + } } // namespace ue2 diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index fb2a2357a..9fc3a4133 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -117,7 +117,6 @@ 117:/[\x{ff]/ #Value in \x{...} sequence is non-hex or missing } at index 1. 118:/foo/{min_offset=10,max_offset=9} #In hs_expr_ext, min_offset must be less than or equal to max_offset. 120:/foo/{min_length=10,max_offset=9} #In hs_expr_ext, min_length must be less than or equal to max_offset. -121:/.e(?:(((eEbd..(d[^Be]{1,7}|A)){8,22}aD.){7}|EecA?(?:\b)c|bB[Dd])){29,37}[adb](?:.|A|c|[BEA]|D)..((?:c|[Cba]))?([Ee]|D)B+(.|[dbB]|E|E).[EcCe]ce(?:C|D)dD[EA]Ac.[aE]d/smiHWP #Pattern too large. 122:/ÀÀ/8 #Expression is not valid UTF-8. 123:/hello \6 world/P #Invalid back reference to expression 6. 124:/hello \6 world|dog/P #Invalid back reference to expression 6. From 997c0c9efd583e37d91a6b9e3b86420a23e91417 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 15 Dec 2015 14:34:25 +1100 Subject: [PATCH 032/218] ComponentRepeat: wire R{0,N} as (R{1,N})? Change the way that we wire up the edges in a bounded repeat to avoid large fan-out from predecessors. --- src/parser/ComponentRepeat.cpp | 34 ++++++++++++++-------------------- src/parser/ComponentRepeat.h | 28 +++++++++++++++------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/src/parser/ComponentRepeat.cpp b/src/parser/ComponentRepeat.cpp index 8cd883724..5ce703acb 100644 --- a/src/parser/ComponentRepeat.cpp +++ b/src/parser/ComponentRepeat.cpp @@ -209,7 +209,7 @@ void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, } } - wireRepeats(bs, lastPos); + wireRepeats(bs); DEBUG_PRINTF("leave\n"); } @@ -279,26 +279,24 @@ vector ComponentRepeat::last() const { assert(!m_firsts.empty()); // notePositions should already have run assert(!m_lasts.empty()); - // Optimisation: when we're not maintaining edge priorities, handling - // optional repeats has been taken care of by our FIRSTS. Thus, only - // the last mandatory repeat and (if different) the last optional - // repeat contributes to lasts. - if (m_min) { - const vector &l = m_lasts[m_min - 1]; - lasts.insert(lasts.end(), l.begin(), l.end()); - } + const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0]; + lasts.insert(lasts.end(), l.begin(), l.end()); + if (!m_min || m_min != m_lasts.size()) { lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end()); } + + DEBUG_PRINTF("lasts = %s\n", + dumpPositions(lasts.begin(), lasts.end()).c_str()); return lasts; } -void ComponentRepeat::wireRepeats(GlushkovBuildState &bs, - const vector &lastPos) { +void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { /* note: m_lasts[0] already valid */ u32 copies = m_firsts.size(); const bool isEmpty = sub_comp->empty(); - const vector &optLasts = m_min ? m_lasts[m_min - 1] : lastPos; + const vector &optLasts = + m_min ? m_lasts[m_min - 1] : m_lasts[0]; if (!copies) { goto inf_check; @@ -317,7 +315,7 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs, DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { vector lasts = m_lasts[rep - 1]; - if (m_min && rep != m_min) { + if (rep != m_min) { lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); sort(lasts.begin(), lasts.end()); lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end()); @@ -340,8 +338,8 @@ void ComponentRepeat::precalc_firsts() { /* For normal repeat, our optional repeats each have an epsilon at the end * of their firsts lists. */ - for (u32 i = m_min; i < m_firsts.size();i++) { - m_firsts[i].insert(m_firsts[i].end(), GlushkovBuildState::POS_EPSILON); + for (u32 i = m_min; i < m_firsts.size(); i++) { + m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON); } firsts_cache.clear(); @@ -352,11 +350,7 @@ void ComponentRepeat::precalc_firsts() { assert(!m_firsts.empty()); // notePositions should already have run const vector &f = m_firsts.front(); - // If we're running without edge priorities, then we want to generate the - // repeat in such a way that the firsts do all the work. This will minimise - // the number of exceptional states in a LimEx NFA implementation. - - if (!m_min || sub_comp->empty()) { + if (sub_comp->empty()) { // Emptiable: all our repeats contribute to firsts. // Each repeat's firsts is spliced in at the location of the epsilon // (if any) in the previous repeat's firsts. diff --git a/src/parser/ComponentRepeat.h b/src/parser/ComponentRepeat.h index e8305ffd6..b708e062e 100644 --- a/src/parser/ComponentRepeat.h +++ b/src/parser/ComponentRepeat.h @@ -42,30 +42,33 @@ namespace ue2 { -/** \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', +/** + * \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', * etc). * - * Ascii Art Time: + * ASCII Art Time: * * Our standard representation of standard repeats. Other constructions (fan-in * vs fan-out) would also be possible and equivalent for our purposes. * * {n,m} * - * S->M->M->M->O->O->O->T - * | ^ ^ ^ - * | | | | - * \-----------/ + * S->M->M->M->O->O->O->T + * | ^ ^ ^ + * | | | | + * \-----------/ * * {0,m} * - * S->O->O->O->T - * | ^ ^ ^ - * | | | | - * \-----------/ + * /-----------\ + * | | + * | V + * S->O->O->O->T + * | ^ ^ ^ + * | | | | + * \--------/ * */ - class ComponentRepeat : public Component { friend class ConstructLiteralVisitor; friend class DumpVisitor; @@ -120,8 +123,7 @@ class ComponentRepeat : public Component { /** Called by \ref buildFollowSet to connect up the various repeats. */ void precalc_firsts(); void postSubNotePositionHook(); - void wireRepeats(GlushkovBuildState &bs, - const std::vector &lastPos); + void wireRepeats(GlushkovBuildState &bs); std::unique_ptr sub_comp; u32 m_min; From 3d049d6de3899c54380ebfaf5080af65b3a3c35a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Jan 2016 15:23:28 +1100 Subject: [PATCH 033/218] ComponentRepeat: wire X{0,N} and (X?){N} the same --- src/parser/ComponentRepeat.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/parser/ComponentRepeat.cpp b/src/parser/ComponentRepeat.cpp index 5ce703acb..670cee37e 100644 --- a/src/parser/ComponentRepeat.cpp +++ b/src/parser/ComponentRepeat.cpp @@ -349,18 +349,7 @@ void ComponentRepeat::precalc_firsts() { assert(!m_firsts.empty()); // notePositions should already have run const vector &f = m_firsts.front(); - - if (sub_comp->empty()) { - // Emptiable: all our repeats contribute to firsts. - // Each repeat's firsts is spliced in at the location of the epsilon - // (if any) in the previous repeat's firsts. - for (const auto &e : m_firsts) { - replaceEpsilons(firsts_cache, e); - } - } else { - // Not emptiable: firsts come from our first repeat only. - firsts_cache.insert(firsts_cache.end(), f.begin(), f.end()); - } + firsts_cache.insert(firsts_cache.end(), f.begin(), f.end()); } static From e92a20e5fac3bb42936439363fd4c47a9c6237fe Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Jan 2016 15:33:05 +1100 Subject: [PATCH 034/218] ComponentRepeat: remove firsts_cache, precalc code Firsts are easy to compute in ComponentRepeat::first() now. --- src/parser/ComponentRepeat.cpp | 44 +++++++++++++--------------------- src/parser/ComponentRepeat.h | 4 ---- 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/src/parser/ComponentRepeat.cpp b/src/parser/ComponentRepeat.cpp index 670cee37e..ff02703cd 100644 --- a/src/parser/ComponentRepeat.cpp +++ b/src/parser/ComponentRepeat.cpp @@ -87,8 +87,7 @@ ComponentRepeat::ComponentRepeat(const ComponentRepeat &other) type(other.type), sub_comp(unique_ptr(other.sub_comp->clone())), m_min(other.m_min), m_max(other.m_max), m_firsts(other.m_firsts), m_lasts(other.m_lasts), - posFirst(other.posFirst), posLast(other.posLast), - firsts_cache(other.firsts_cache) {} + posFirst(other.posFirst), posLast(other.posLast) {} bool ComponentRepeat::empty() const { return m_min == 0 || sub_comp->empty(); @@ -175,14 +174,24 @@ void ComponentRepeat::notePositions(GlushkovBuildState &bs) { } recordPosBounds(posFirst, bs.getBuilder().numVertices()); - precalc_firsts(); /* ComponentRepeat requires firsts to be calculated ahead - * of time and cached due to expense */ + + // Each optional repeat has an epsilon at the end of its firsts list. + for (u32 i = m_min; i < m_firsts.size(); i++) { + m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON); + } + } vector ComponentRepeat::first() const { - DEBUG_PRINTF("firsts = %s\n", dumpPositions(firsts_cache.begin(), - firsts_cache.end()).c_str()); - return firsts_cache; + if (!m_max) { + return {}; + } + + assert(!m_firsts.empty()); // notePositions should already have run + const vector &firsts = m_firsts.front(); + DEBUG_PRINTF("firsts = %s\n", + dumpPositions(begin(firsts), end(firsts)).c_str()); + return firsts; } void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, @@ -331,27 +340,6 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { } } -void ComponentRepeat::precalc_firsts() { - DEBUG_PRINTF("building firsts for {%u,%u} repeat with %s sub\n", m_min, - m_max, sub_comp->empty() ? "emptiable" : "non-emptiable"); - - /* For normal repeat, our optional repeats each have an epsilon at the end - * of their firsts lists. - */ - for (u32 i = m_min; i < m_firsts.size(); i++) { - m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON); - } - - firsts_cache.clear(); - if (!m_max) { - return; - } - - assert(!m_firsts.empty()); // notePositions should already have run - const vector &f = m_firsts.front(); - firsts_cache.insert(firsts_cache.end(), f.begin(), f.end()); -} - static bool hasPositionFlags(const Component &c) { for (const auto &e : c.first()) { diff --git a/src/parser/ComponentRepeat.h b/src/parser/ComponentRepeat.h index b708e062e..8905bfcf5 100644 --- a/src/parser/ComponentRepeat.h +++ b/src/parser/ComponentRepeat.h @@ -120,8 +120,6 @@ class ComponentRepeat : public Component { enum RepeatType type; protected: - /** Called by \ref buildFollowSet to connect up the various repeats. */ - void precalc_firsts(); void postSubNotePositionHook(); void wireRepeats(GlushkovBuildState &bs); @@ -134,8 +132,6 @@ class ComponentRepeat : public Component { Position posFirst; Position posLast; - std::vector firsts_cache; - ComponentRepeat(const ComponentRepeat &other); }; From e051077a26cdf1af989c01587c1fa93c4ecacd37 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 6 Jan 2016 11:45:31 +1100 Subject: [PATCH 035/218] Remove "dot" entries from leftfix lookarounds Note that we have to be careful to leave the first lookaround entry in place, if it's a dot. This should eventually be done with a program instruction. --- src/rose/rose_build_lookaround.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index 02843feeb..54c01e089 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -582,6 +582,30 @@ bool getTransientPrefixReach(const NGHolder &g, u32 lag, return true; } +static +void normaliseLeftfix(map &look) { + // We can erase entries where the reach is "all characters", except for the + // very first one -- this might be required to establish a minimum bound on + // the literal's match offset. + + // TODO: It would be cleaner to use a literal program instruction to check + // the minimum bound explicitly. + + if (look.empty()) { + return; + } + + const auto earliest = begin(look)->first; + + vector dead; + for (const auto &m : look) { + if (m.second.all() && m.first != earliest) { + dead.push_back(m.first); + } + } + erase_all(&look, dead); +} + bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, vector &lookaround) { lookaround.clear(); @@ -606,6 +630,7 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, } trimLiterals(build, v, look); + normaliseLeftfix(look); if (look.size() > MAX_LOOKAROUND_ENTRIES) { DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); From 3d87e382fad40e7294199ac63fa50b61dd54e627 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 8 Jan 2016 09:11:09 +1100 Subject: [PATCH 036/218] Remove CHECK_DEPTH instruction --- src/rose/program_runtime.h | 12 ------------ src/rose/rose_build_bytecode.cpp | 27 ++------------------------- src/rose/rose_dump.cpp | 6 ------ src/rose/rose_program.h | 7 ------- 4 files changed, 2 insertions(+), 50 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 26cfce5f3..cc345e289 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -829,18 +829,6 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_DEPTH) { - DEBUG_PRINTF("current depth %u, check min depth %u\n", - tctxt->depth, ri->min_depth); - if (ri->min_depth > tctxt->depth) { - DEBUG_PRINTF("failed depth check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ONLY_EOD) { struct core_info *ci = &tctxtToScratch(tctxt)->core_info; if (end != ci->buf_offset + ci->len) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 9c6a84b3d..985efef50 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -170,7 +170,6 @@ class RoseInstruction { const void *get() const { switch (code()) { - case ROSE_INSTR_CHECK_DEPTH: return &u.checkDepth; case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; @@ -199,7 +198,6 @@ class RoseInstruction { size_t length() const { switch (code()) { - case ROSE_INSTR_CHECK_DEPTH: return sizeof(u.checkDepth); case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); @@ -226,7 +224,6 @@ class RoseInstruction { } union { - ROSE_STRUCT_CHECK_DEPTH checkDepth; ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; @@ -2633,10 +2630,6 @@ flattenProgram(const vector> &programs) { assert(targets[i] > offsets[i]); // jumps always progress ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; break; - case ROSE_INSTR_CHECK_DEPTH: - assert(targets[i] > offsets[i]); - ri.u.checkDepth.fail_jump = targets[i] - offsets[i]; - break; case ROSE_INSTR_CHECK_ONLY_EOD: assert(targets[i] > offsets[i]); ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; @@ -3365,7 +3358,6 @@ vector makePredProgram(RoseBuildImpl &build, build_context &bc, static pair makeSparseIterProgram(build_context &bc, map>> &predProgramLists, - const vector &verts, const vector &root_program) { vector program; u32 iter_offset = 0; @@ -3389,17 +3381,6 @@ pair makeSparseIterProgram(build_context &bc, vector jump_table; u32 curr_offset = 0; - // Add a pre-check for min depth, if it's useful. - if (!verts.empty()) { - u32 min_depth = calcMinDepth(bc.depths, verts); - if (min_depth > 1) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_DEPTH); - ri.u.checkDepth.min_depth = min_depth; - program.push_back(ri); - curr_offset = ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - } - program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); @@ -3436,9 +3417,6 @@ pair makeSparseIterProgram(build_context &bc, for (size_t i = 0; i < program.size(); i++) { auto &ri = program[i]; switch (ri.code()) { - case ROSE_INSTR_CHECK_DEPTH: - ri.u.checkDepth.fail_jump = end_offset - curr_offset; - break; case ROSE_INSTR_SPARSE_ITER_BEGIN: ri.u.sparseIterBegin.iter_offset = iter_offset; ri.u.sparseIterBegin.jump_table = jump_table_offset; @@ -3518,8 +3496,7 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, } // Put it all together. - return makeSparseIterProgram(bc, predProgramLists, nonroot_verts, - root_program).first; + return makeSparseIterProgram(bc, predProgramLists, root_program).first; } static @@ -3632,7 +3609,7 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { return {0, 0}; } - return makeSparseIterProgram(bc, predProgramLists, {}, {}); + return makeSparseIterProgram(bc, predProgramLists, {}); } static diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index beadd23e2..484fde5f7 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -203,12 +203,6 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_DEPTH) { - os << " min_depth " << u32{ri->min_depth} << endl; - os << " fail_jump +" << ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ONLY_EOD) { os << " fail_jump +" << ri->fail_jump << endl; } diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index f7028c722..a23290739 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -42,7 +42,6 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. - ROSE_INSTR_CHECK_DEPTH, //!< Check minimum graph depth. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". @@ -72,12 +71,6 @@ struct ROSE_STRUCT_ANCHORED_DELAY { u32 done_jump; //!< Jump forward this many bytes if successful. }; -struct ROSE_STRUCT_CHECK_DEPTH { - u8 code; //!< From enum RoseInstructionCode. - u8 min_depth; //!< Minimum depth of this literal in the Rose graph. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - struct ROSE_STRUCT_CHECK_ONLY_EOD { u8 code; //!< From enum RoseInstructionCode. u32 fail_jump; //!< Jump forward this many bytes on failure. From 14f18bd6e865103a594f7e1de7fcd65c232070c0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 8 Jan 2016 09:58:20 +1100 Subject: [PATCH 037/218] Don't use depth for in-flight check --- src/rose/block.c | 2 +- src/rose/match.h | 21 +++++++++++++++++++++ src/rose/stream.c | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index cfcb83416..b3833d4b5 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -233,7 +233,7 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, if (ftable) { DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, t->floatingMinDistance); - if (t->noFloatingRoots && tctxt->depth == 1) { + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); goto exit; } diff --git a/src/rose/match.h b/src/rose/match.h index 19a07c9a1..59d83a426 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -299,4 +299,25 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, mmbit_sparse_iter_unset(role_state, numStates, it, si_state); } +static rose_inline +int roseHasInFlightMatches(const struct RoseEngine *t, u8 *state, + const struct hs_scratch *scratch) { + if (scratch->al_log_sum) { + DEBUG_PRINTF("anchored literals in log\n"); + return 1; + } + + if (scratch->tctxt.filledDelayedSlots) { + DEBUG_PRINTF("delayed literal\n"); + return 1; + } + + if (mmbit_any(getRoleState(state), t->rolesWithStateCount)) { + DEBUG_PRINTF("role state is set\n"); + return 1; + } + + return 0; +} + #endif diff --git a/src/rose/stream.c b/src/rose/stream.c index ab23346a4..ae119bcf6 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -502,7 +502,7 @@ void roseStreamExec(const struct RoseEngine *t, u8 *state, const struct HWLM *ftable = getFLiteralMatcher(t); if (ftable) { - if (t->noFloatingRoots && tctxt->depth == 1) { + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); goto flush_delay_and_exit; } From 48c9d7c381d0ff8dc455070f05d39b24f5228709 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 8 Jan 2016 10:10:10 +1100 Subject: [PATCH 038/218] Remove use of depth from Rose entirely --- src/rose/block.c | 1 - src/rose/eod.c | 2 -- src/rose/init.c | 4 +--- src/rose/match.c | 23 ++++++++-------------- src/rose/match.h | 8 -------- src/rose/program_runtime.h | 13 +++---------- src/rose/rose_build_bytecode.cpp | 5 +---- src/rose/rose_build_dump.cpp | 7 ++----- src/rose/rose_build_impl.h | 4 ---- src/rose/rose_build_misc.cpp | 33 -------------------------------- src/rose/rose_dump.cpp | 2 -- src/rose/rose_internal.h | 1 - src/rose/rose_program.h | 2 -- src/rose/stream.c | 2 -- src/scratch.h | 1 - 15 files changed, 15 insertions(+), 93 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index b3833d4b5..3d1eb9e32 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -141,7 +141,6 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, struct RoseContext *tctxt = &scratch->tctxt; tctxt->t = t; - tctxt->depth = 1; tctxt->groups = t->initialGroups; tctxt->lit_offset_adjust = 1; // index after last byte tctxt->delayLastEndOffset = 0; diff --git a/src/rose/eod.c b/src/rose/eod.c index a6524f961..ef9873882 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -36,10 +36,8 @@ static really_inline void initContext(const struct RoseEngine *t, u8 *state, u64a offset, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, void *ctx) { - struct RoseRuntimeState *rstate = getRuntimeState(state); struct RoseContext *tctxt = &scratch->tctxt; tctxt->t = t; - tctxt->depth = rstate->stored_depth; tctxt->groups = loadGroups(t, state); /* TODO: diff groups for eod */ tctxt->lit_offset_adjust = scratch->core_info.buf_offset - scratch->core_info.hlen diff --git a/src/rose/init.c b/src/rose/init.c index d2f85f2c5..1cb26821e 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -43,11 +43,9 @@ static really_inline void init_rstate(const struct RoseEngine *t, u8 *state) { - // Set runtime state: initial depth is 1 and we take our initial groups - // from the RoseEngine. + // Set runtime state: we take our initial groups from the RoseEngine. DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups); struct RoseRuntimeState *rstate = getRuntimeState(state); - rstate->stored_depth = 1; storeGroups(t, state, t->initialGroups); rstate->flags = 0; rstate->broken = NOT_BROKEN; diff --git a/src/rose/match.c b/src/rose/match.c index 1c688aabb..89f0674e7 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -205,8 +205,7 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, printf("\n"); #endif - DEBUG_PRINTF("STATE depth=%u, groups=0x%016llx\n", tctx->depth, - tctx->groups); + DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); if (isLiteralDR(id)) { return tctx->groups; @@ -224,7 +223,7 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, pushDelayedMatches(tl, real_end, tctx); - /* we are just repopulating the delay queue, groups and depths should be + /* we are just repopulating the delay queue, groups should be * already set from the original scan. */ return tctx->groups; @@ -425,8 +424,7 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { u64a real_end = ci->buf_offset + end; // index after last byte DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end); - DEBUG_PRINTF("STATE depth=%u, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); + DEBUG_PRINTF("STATE groups=0x%016llx\n", tctxt->groups); if (can_stop_matching(tctxtToScratch(tctxt))) { DEBUG_PRINTF("received a match when we're already dead!\n"); @@ -492,8 +490,7 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { roseSquashGroup(tctxt, tl); } - DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); if (real_end > t->floatingMinLiteralMatchOffset) { recordAnchoredLiteralMatch(tctxt, id, real_end); @@ -623,8 +620,7 @@ hwlmcb_rv_t playDelaySlot(struct RoseContext *tctxt, const u8 *delaySlotBase, DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", literal_id, offset); hwlmcb_rv_t rv = roseProcessDelayedMatch(tctxt->t, offset, literal_id, tctxt); - DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* delayed literals can't safely set groups. * However we may be setting groups that successors already have @@ -656,8 +652,7 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(struct RoseContext *tctxt, u32 curr_loc) { curr_loc); hwlmcb_rv_t rv = roseProcessDelayedAnchoredMatch(tctxt->t, curr_loc, literal_id, tctxt); - DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* anchored literals can't safely set groups. * However we may be setting groups that successors already @@ -837,8 +832,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { #endif DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset); - DEBUG_PRINTF("STATE depth=%u, groups=0x%016llx\n", tctx->depth, - tctx->groups); + DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); if (can_stop_matching(tctxtToScratch(tctx))) { DEBUG_PRINTF("received a match when we're already dead!\n"); @@ -864,8 +858,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { rv = roseProcessMainMatch(tctx->t, real_end, id, tctx); - DEBUG_PRINTF("DONE depth=%hhu, groups=0x%016llx\n", tctx->depth, - tctx->groups); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); if (rv != HWLM_TERMINATE_MATCHING) { return tctx->groups; diff --git a/src/rose/match.h b/src/rose/match.h index 59d83a426..cab172673 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -260,14 +260,6 @@ hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt return HWLM_CONTINUE_MATCHING; } -static really_inline -void update_depth(struct RoseContext *tctxt, u8 depth) { - u8 d = MAX(tctxt->depth, depth + 1); - assert(d >= tctxt->depth); - DEBUG_PRINTF("depth now %hhu was %hhu\n", d, tctxt->depth); - tctxt->depth = d; -} - static rose_inline void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, u64a currEnd, struct RoseContext *tctxt) { diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index cc345e289..6ba86ca65 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -387,14 +387,6 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, } } -static rose_inline -void roseSetRole(const struct RoseEngine *t, u8 *state, - struct RoseContext *tctxt, u32 stateIndex, u8 depth) { - DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth); - mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex); - update_depth(tctxt, depth); -} - static rose_inline void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel, struct RoseContext *tctxt) { @@ -819,7 +811,6 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(ANCHORED_DELAY) { if (in_anchored && end > t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("delay until playback\n"); - update_depth(tctxt, ri->depth); tctxt->groups |= ri->groups; *work_done = 1; assert(ri->done_jump); // must progress @@ -971,7 +962,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SET_STATE) { - roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth); + DEBUG_PRINTF("set state index %u\n", ri->index); + mmbit_set(getRoleState(tctxt->state), t->rolesWithStateCount, + ri->index); *work_done = 1; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 985efef50..5c56f2d58 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2941,7 +2941,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, +void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc, RoseVertex v, vector &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { @@ -2952,7 +2952,6 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, // floatingMinLiteralMatchOffset. auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY); - ri.u.anchoredDelay.depth = (u8)min(254U, bc.depths.at(v)); ri.u.anchoredDelay.groups = build.g[v].groups; program.push_back(ri); } @@ -3107,7 +3106,6 @@ void makeRoleSetState(const build_context &bc, RoseVertex v, u32 idx = it->second; auto ri = RoseInstruction(ROSE_INSTR_SET_STATE); ri.u.setState.index = idx; - ri.u.setState.depth = (u8)min(254U, bc.depths.at(v)); program.push_back(ri); } @@ -3785,7 +3783,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { aligned_unique_ptr sbtable = buildSmallBlockMatcher(*this, &sbsize); build_context bc; - bc.depths = findDepths(*this); // Build NFAs set no_retrigger_queues; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 96ff77347..e73d81c3f 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -316,8 +316,6 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { os << "ROSE LITERALS: a total of " << build.literals.right.size() << " literals and " << num_vertices(g) << " roles." << endl << endl; - const auto depths = findDepths(build); - for (const auto &e : build.literals.right) { u32 id = e.first; const ue2_literal &s = e.second.s; @@ -387,9 +385,8 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { for (RoseVertex v : verts) { // role info - os << " Index " << g[v].idx << ": depth=" << depths.at(v) - << ", groups=0x" << hex << setw(16) << setfill('0') - << g[v].groups << dec; + os << " Index " << g[v].idx << ": groups=0x" << hex << setw(16) + << setfill('0') << g[v].groups << dec; if (g[v].reports.empty()) { os << ", report=NONE"; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index b2604ff04..a7f2e2f70 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -562,10 +562,6 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, void fillHamsterLiteralList(const RoseBuildImpl &tbi, rose_literal_table table, std::vector *hl); -// Find the minimum depth in hops of each role. Note that a role may be -// accessible from both the root and the anchored root. -std::map findDepths(const RoseBuildImpl &build); - #ifndef NDEBUG bool canImplementGraphs(const RoseBuildImpl &tbi); #endif diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 9ec26d4c8..044a4208f 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1108,39 +1108,6 @@ LeftEngInfo::operator bool() const { return graph || castle || dfa || haig; } -// Find the minimum depth in hops of each role. Note that a role may be -// accessible from both the root and the anchored root. -map findDepths(const RoseBuildImpl &build) { - const RoseGraph &g = build.g; - map depths; - - depths[build.root] = 0; - depths[build.anchored_root] = 0; - - // BFS from root first. - breadth_first_search(g, build.root, visitor(make_bfs_visitor( - record_distances(boost::make_assoc_property_map(depths), - boost::on_tree_edge()))). - vertex_index_map(get(&RoseVertexProps::idx, g))); - - // BFS from anchored root, updating depths in the graph when they get - // smaller. - map depthsAnch; - breadth_first_search(g, build.anchored_root, visitor(make_bfs_visitor( - record_distances(boost::make_assoc_property_map(depthsAnch), - boost::on_tree_edge()))). - vertex_index_map(get(&RoseVertexProps::idx, g))); - for (const auto &e : depthsAnch) { - if (contains(depths, e.first)) { - LIMIT_TO_AT_MOST(&depths[e.first], e.second); - } else { - depths.insert(e); - } - } - - return depths; -} - u32 roseQuality(const RoseEngine *t) { /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs */ diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 484fde5f7..6210d1022 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -196,7 +196,6 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { assert(code <= ROSE_INSTR_END); switch (code) { PROGRAM_CASE(ANCHORED_DELAY) { - os << " depth " << u32{ri->depth} << endl; os << " groups 0x" << std::hex << ri->groups << std::dec << endl; os << " done_jump +" << ri->done_jump << endl; @@ -292,7 +291,6 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SET_STATE) { - os << " depth " << u32{ri->depth} << endl; os << " index " << ri->index << endl; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 1f927a2cb..92a67ae14 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -512,7 +512,6 @@ struct lit_benefits { #endif // Rose runtime state struct RoseRuntimeState { - u8 stored_depth; /* depth at stream boundary */ u8 flags; /* high bit true if delay rebuild needed */ u8 broken; /* user has requested that we stop matching */ #if defined(_WIN32) diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index a23290739..3f59ba15a 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -66,7 +66,6 @@ enum RoseInstructionCode { struct ROSE_STRUCT_ANCHORED_DELAY { u8 code; //!< From enum RoseInstructionCode. - u8 depth; //!< Depth for this state. rose_group groups; //!< Bitmask. u32 done_jump; //!< Jump forward this many bytes if successful. }; @@ -160,7 +159,6 @@ struct ROSE_STRUCT_REPORT_SOM_KNOWN { struct ROSE_STRUCT_SET_STATE { u8 code; //!< From enum RoseInstructionCode. - u8 depth; //!< Depth for this state. u32 index; //!< State index in multibit. }; diff --git a/src/rose/stream.c b/src/rose/stream.c index ae119bcf6..0bbab851d 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -409,7 +409,6 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, u8 *state, tctxt->lastEndOffset = offset + length; storeGroups(t, state, tctxt->groups); struct RoseRuntimeState *rstate = getRuntimeState(state); - rstate->stored_depth = tctxt->depth; rstate->flags = delay_rb_status; } @@ -454,7 +453,6 @@ void roseStreamExec(const struct RoseEngine *t, u8 *state, struct RoseContext *tctxt = &scratch->tctxt; tctxt->t = t; - tctxt->depth = rstate->stored_depth; tctxt->mpv_inactive = 0; tctxt->groups = loadGroups(t, state); tctxt->lit_offset_adjust = offset + 1; // index after last byte diff --git a/src/scratch.h b/src/scratch.h index a1efe6d94..1faf60f70 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -106,7 +106,6 @@ struct core_info { struct RoseContext { const struct RoseEngine *t; u8 *state; /**< base pointer to the full state */ - u8 depth; u8 mpv_inactive; u64a groups; u64a lit_offset_adjust; /**< offset to add to matches coming from hwlm */ From fafcc83520f086debfb5ebcb6f14895f3653760e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 11 Jan 2016 08:58:08 +1100 Subject: [PATCH 039/218] Delete unused build_context::depths --- src/rose/rose_build_bytecode.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5c56f2d58..b0c251f4d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -249,9 +249,6 @@ class RoseInstruction { }; struct build_context : boost::noncopyable { - /** \brief minimum depth in number of hops from root/anchored root. */ - map depths; - /** \brief information about engines to the left of a vertex */ map leftfix_info; From fe475cc0698780189b127e680be593dfafdcc059 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 11 Jan 2016 14:28:27 +1100 Subject: [PATCH 040/218] alignof() should operate on a type-id --- src/rose/rose_build_bytecode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b0c251f4d..5f6541919 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -327,14 +327,14 @@ template static u32 add_to_engine_blob(build_context &bc, const T &a) { static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, sizeof(a), alignof(a)); + return add_to_engine_blob(bc, &a, sizeof(a), alignof(T)); } template static u32 add_to_engine_blob(build_context &bc, const T &a, const size_t len) { static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, len, alignof(a)); + return add_to_engine_blob(bc, &a, len, alignof(T)); } template From 255d84a83a44202008b03572fae33a60b4b848d0 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 11 Jan 2016 13:14:58 +1100 Subject: [PATCH 041/218] squashing: prevent generation of pairs of squash states --- src/nfagraph/ng_squash.cpp | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index 04afda817..dd3693e59 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -319,6 +319,44 @@ void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, } } +/* If there are redundant states in the graph, it may be possible for two sibling + * .* states to try to squash each other -- which should be prevented + * + * Note: this situation should only happen if ng_equivalence has not been run. + */ +static +void clearMutualSquashers(const NGHolder &g, const vector &vByIndex, + map &squash) { + for (auto it = squash.begin(); it != squash.end();) { + NFAVertex a = it->first; + u32 a_index = g[a].index; + + NFAStateSet a_squash = ~it->second; /* default is mask of survivors */ + for (NFAStateSet::size_type b_index = a_squash.find_first(); + b_index != a_squash.npos; b_index = a_squash.find_next(b_index)) { + assert(b_index != a_index); + NFAVertex b = vByIndex[b_index]; + if (!contains(squash, b)) { + continue; + } + if (!squash[b].test(a_index)) { + /* b and a squash each other, prevent this */ + DEBUG_PRINTF("removing mutual squash %u %zu\n", + a_index, b_index); + squash[b].set(a_index); + it->second.set(b_index); + } + } + + if (it->second.all()) { + DEBUG_PRINTF("%u is no longer an effictive squash state\n", a_index); + it = squash.erase(it); + } else { + ++it; + } + } +} + map findSquashers(const NGHolder &g, som_type som) { map squash; @@ -460,6 +498,8 @@ map findSquashers(const NGHolder &g, som_type som) { findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som, som_depths, region_map, cache); + clearMutualSquashers(g, vByIndex, squash); + return squash; } From 10cda4cc3364c5fe0383542d508c1773b1139fd9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 18 Dec 2015 15:24:52 +1100 Subject: [PATCH 042/218] Rose: Move all literal operations into program Replace the RoseLiteral structure with more program instructions; now, instead of each literal ID leading to a RoseLiteral, it simply has a program to run (and a delay rebuild program). This commit also makes some other improvements: * CHECK_STATE instruction, for use instead of a sparse iterator over a single element. * Elide some checks (CHECK_LIT_EARLY, ANCHORED_DELAY, etc) when not needed. * Flatten PUSH_DELAYED behaviour to one instruction per delayed literal, rather than the mask/index-list approach used before. * Simple program cache at compile time for deduplication. --- src/rose/eod.c | 14 +- src/rose/match.c | 234 ++--------- src/rose/program_runtime.h | 200 +++++++-- src/rose/rose_build_bytecode.cpp | 671 +++++++++++++++++++++---------- src/rose/rose_build_compile.cpp | 18 +- src/rose/rose_build_impl.h | 3 +- src/rose/rose_build_misc.cpp | 3 +- src/rose/rose_build_util.h | 17 +- src/rose/rose_dump.cpp | 126 +++--- src/rose/rose_internal.h | 129 ++---- src/rose/rose_program.h | 50 ++- 11 files changed, 840 insertions(+), 625 deletions(-) diff --git a/src/rose/eod.c b/src/rose/eod.c index ef9873882..b95a952e4 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -114,9 +114,9 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); - int work_done = 0; - if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0, - &work_done) == HWLM_TERMINATE_MATCHING) { + const size_t match_len = 0; + if (roseRunProgram(t, t->eodIterProgramOffset, offset, match_len, + &(scratch->tctxt), 0) == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -233,9 +233,9 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset, // There should be no pending delayed literals. assert(!scratch->tctxt.filledDelayedSlots); - int work_done = 0; - if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0, - &work_done) == HWLM_TERMINATE_MATCHING) { + const size_t match_len = 0; + if (roseRunProgram(t, t->eodProgramOffset, offset, match_len, + &scratch->tctxt, 0) == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/rose/match.c b/src/rose/match.c index 89f0674e7..72f2a167c 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,123 +71,6 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) { } #endif -static rose_inline -int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind, - const u8 *and_mask, const u8 *exp_mask) { - DEBUG_PRINTF("am offset = %zu, em offset = %zu\n", - and_mask - (const u8 *)tctxt->t, - exp_mask - (const u8 *)tctxt->t); - const u8 *data; - - // If the check works over part of the history and part of the buffer, we - // create a temporary copy of the data in here so it's contiguous. - u8 temp[MAX_MASK2_WIDTH]; - - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - s64a buffer_offset = (s64a)end - ci->buf_offset; - DEBUG_PRINTF("rel offset %lld\n", buffer_offset); - if (buffer_offset >= mask_rewind) { - data = ci->buf + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else if (buffer_offset <= 0) { - data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else { - u32 shortfall = mask_rewind - buffer_offset; - DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall, - mask_rewind, ci->hlen); - data = temp; - memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall); - memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall); - } - -#ifdef DEBUG - DEBUG_PRINTF("DATA: "); - for (u32 i = 0; i < mask_rewind; i++) { - printf("%c", ourisprint(data[i]) ? data[i] : '?'); - } - printf(" (len=%u)\n", mask_rewind); -#endif - - u32 len = mask_rewind; - while (len >= sizeof(u64a)) { - u64a a = unaligned_load_u64a(data); - a &= *(const u64a *)and_mask; - if (a != *(const u64a *)exp_mask) { - DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask); - return 0; - } - data += sizeof(u64a); - and_mask += sizeof(u64a); - exp_mask += sizeof(u64a); - len -= sizeof(u64a); - } - - while (len) { - u8 a = *data; - a &= *and_mask; - if (a != *exp_mask) { - DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx em%02hhx\n", a, - *data, *and_mask, *exp_mask); - return 0; - } - data++; - and_mask++; - exp_mask++; - len--; - } - - return 1; -} - -static -int roseCheckLiteralBenefits(u64a end, size_t mask_rewind, u32 id, - struct RoseContext *tctxt) { - const struct RoseEngine *t = tctxt->t; - const struct lit_benefits *lbi = getLiteralBenefitsTable(t) + id; - return roseCheckBenefits(tctxt, end, mask_rewind, lbi->and_mask.a8, - lbi->expected.e8); -} - -static rose_inline -void pushDelayedMatches(const struct RoseLiteral *tl, u64a offset, - struct RoseContext *tctxt) { - u32 delay_mask = tl->delay_mask; - if (!delay_mask) { - return; - } - - u32 delay_count = tctxt->t->delay_count; - u8 *delaySlotBase = getDelaySlots(tctxtToScratch(tctxt)); - size_t delaySlotSize = tctxt->t->delay_slot_size; - assert(tl->delayIdsOffset != ROSE_OFFSET_INVALID); - const u32 *delayIds = getByOffset(tctxt->t, tl->delayIdsOffset); - assert(ISALIGNED(delayIds)); - - while (delay_mask) { - u32 src_slot_index = findAndClearLSB_32(&delay_mask); - u32 slot_index = (src_slot_index + offset) & DELAY_MASK; - u8 *slot = delaySlotBase + delaySlotSize * slot_index; - - if (offset + src_slot_index <= tctxt->delayLastEndOffset) { - DEBUG_PRINTF("skip too late\n"); - goto next; - } - - DEBUG_PRINTF("pushing tab %u into slot %u\n", *delayIds, slot_index); - if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { - tctxt->filledDelayedSlots |= 1U << slot_index; - mmbit_clear(slot, delay_count); - } - - mmbit_set(slot, delay_count, *delayIds); - next: - delayIds++; - } -} - hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, void *ctx) { struct hs_scratch *scratch = ctx; @@ -211,17 +94,17 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, return tctx->groups; } - if (id < t->nonbenefits_base_id - && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) { - return tctx->groups; - } - assert(id < t->literalCount); - const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - - DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); + const u32 *delayRebuildPrograms = + getByOffset(t, t->litDelayRebuildProgramOffset); + const u32 programOffset = delayRebuildPrograms[id]; - pushDelayedMatches(tl, real_end, tctx); + if (programOffset) { + const size_t match_len = end - start + 1; + UNUSED hwlmcb_rv_t rv = + roseRunProgram(t, programOffset, real_end, match_len, tctx, 0); + assert(rv != HWLM_TERMINATE_MATCHING); + } /* we are just repopulating the delay queue, groups should be * already set from the original scan. */ @@ -465,31 +348,28 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { } assert(id < t->literalCount); - const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - assert(tl->programOffset); - assert(!tl->delay_mask); + const u32 *programs = getByOffset(t, t->litProgramOffset); + const u32 programOffset = programs[id]; + assert(programOffset); + + // Anchored literals are never delayed. + assert(!((const u32 *)getByOffset(t, t->litDelayRebuildProgramOffset))[id]); - DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); + DEBUG_PRINTF("literal id=%u\n", id); if (real_end <= t->floatingMinLiteralMatchOffset) { roseFlushLastByteHistory(t, state, real_end, tctxt); tctxt->lastEndOffset = real_end; } - int work_done = 0; - if (roseRunProgram(t, tl->programOffset, real_end, tctxt, 1, &work_done) == + const size_t match_len = 0; + if (roseRunProgram(t, programOffset, real_end, match_len, tctxt, 1) == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(tctxtToScratch(tctxt))); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; } - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); if (real_end > t->floatingMinLiteralMatchOffset) { @@ -502,9 +382,10 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { // Rose match-processing workhorse /* assumes not in_anchored */ static really_inline -hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, - struct RoseContext *tctxt, char do_group_check, - char in_delay_play, char in_anch_playback) { +hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, + size_t match_len, u32 id, + struct RoseContext *tctxt, char in_delay_play, + char in_anch_playback) { /* assert(!tctxt->in_anchored); */ u8 *state = tctxt->state; @@ -536,63 +417,30 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, } assert(id < t->literalCount); - const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("lit id=%u, groups=0x%016llx\n", id, tl->groups); - - if (do_group_check && !(tl->groups & tctxt->groups)) { - DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n"); - return HWLM_CONTINUE_MATCHING; - } - - assert(!in_delay_play || !tl->delay_mask); - if (!in_delay_play) { - pushDelayedMatches(tl, end, tctxt); - } - - if (end < t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("too soon\n"); - assert(!in_delay_play); /* should not have been enqueued */ - /* continuing on may result in pushing global time back */ - return HWLM_CONTINUE_MATCHING; - } - - int work_done = 0; - - if (tl->programOffset) { - DEBUG_PRINTF("running program at %u\n", tl->programOffset); - if (roseRunProgram(t, tl->programOffset, end, tctxt, 0, &work_done) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return HWLM_CONTINUE_MATCHING; + const u32 *programs = getByOffset(t, t->litProgramOffset); + return roseRunProgram(t, programs[id], end, match_len, tctxt, 0); } - static never_inline -hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end, u32 id, - struct RoseContext *tctxt) { - return roseProcessMatch_i(t, end, id, tctxt, 1, 1, 0); +hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end, + u32 id, struct RoseContext *tctxt) { + size_t match_len = 0; + return roseProcessMatch_i(t, end, match_len, id, tctxt, 1, 0); } static never_inline -hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, u64a end, - u32 id, struct RoseContext *tctxt) { - return roseProcessMatch_i(t, end, id, tctxt, 0, 0, 1); +hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, + u64a end, u32 id, + struct RoseContext *tctxt) { + size_t match_len = 0; + return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 1); } static really_inline -hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, u32 id, +hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, + size_t match_len, u32 id, struct RoseContext *tctxt) { - return roseProcessMatch_i(t, end, id, tctxt, 1, 0, 0); + return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 0); } static rose_inline @@ -839,11 +687,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } - if (id < tctx->t->nonbenefits_base_id - && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) { - return tctx->groups; - } - hwlmcb_rv_t rv = flushQueuedLiterals(tctx, real_end); /* flushDelayed may have advanced tctx->lastEndOffset */ @@ -856,7 +699,8 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } - rv = roseProcessMainMatch(tctx->t, real_end, id, tctx); + size_t match_len = end - start + 1; + rv = roseProcessMainMatch(tctx->t, real_end, match_len, id, tctx); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 6ba86ca65..b4d4aeeed 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,9 +41,108 @@ #include "runtime.h" #include "scratch.h" #include "ue2common.h" +#include "util/compare.h" #include "util/fatbit.h" #include "util/multibit.h" +static rose_inline +int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind, + const u8 *and_mask, const u8 *exp_mask) { + DEBUG_PRINTF("am offset = %zu, em offset = %zu\n", + and_mask - (const u8 *)tctxt->t, + exp_mask - (const u8 *)tctxt->t); + const u8 *data; + + // If the check works over part of the history and part of the buffer, we + // create a temporary copy of the data in here so it's contiguous. + u8 temp[MAX_MASK2_WIDTH]; + + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + s64a buffer_offset = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("rel offset %lld\n", buffer_offset); + if (buffer_offset >= mask_rewind) { + data = ci->buf + buffer_offset - mask_rewind; + DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, + ci->buf, mask_rewind); + } else if (buffer_offset <= 0) { + data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind; + DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, + ci->buf, mask_rewind); + } else { + u32 shortfall = mask_rewind - buffer_offset; + DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall, + mask_rewind, ci->hlen); + data = temp; + memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall); + memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall); + } + +#ifdef DEBUG + DEBUG_PRINTF("DATA: "); + for (u32 i = 0; i < mask_rewind; i++) { + printf("%c", ourisprint(data[i]) ? data[i] : '?'); + } + printf(" (len=%u)\n", mask_rewind); +#endif + + u32 len = mask_rewind; + while (len >= sizeof(u64a)) { + u64a a = unaligned_load_u64a(data); + a &= *(const u64a *)and_mask; + if (a != *(const u64a *)exp_mask) { + DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask); + return 0; + } + data += sizeof(u64a); + and_mask += sizeof(u64a); + exp_mask += sizeof(u64a); + len -= sizeof(u64a); + } + + while (len) { + u8 a = *data; + a &= *and_mask; + if (a != *exp_mask) { + DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx em%02hhx\n", a, + *data, *and_mask, *exp_mask); + return 0; + } + data++; + and_mask++; + exp_mask++; + len--; + } + + return 1; +} + +static rose_inline +void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay, + u32 delay_index, u64a offset, + struct RoseContext *tctxt) { + assert(delay); + + const u32 src_slot_index = delay; + u32 slot_index = (src_slot_index + offset) & DELAY_MASK; + + if (offset + src_slot_index <= tctxt->delayLastEndOffset) { + DEBUG_PRINTF("skip too late\n"); + return; + } + + const u32 delay_count = t->delay_count; + u8 *slot = getDelaySlots(tctxtToScratch(tctxt)) + + (t->delay_slot_size * slot_index); + + DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); + if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { + tctxt->filledDelayedSlots |= 1U << slot_index; + mmbit_clear(slot, delay_count); + } + + mmbit_set(slot, delay_count, delay_index); +} + static rose_inline char rosePrefixCheckMiracles(const struct RoseEngine *t, const struct LeftNfaInfo *left, @@ -782,10 +881,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { break; \ } -static really_inline +static rose_inline hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, - u64a end, struct RoseContext *tctxt, - char in_anchored, int *work_done) { + u64a end, size_t match_len, + struct RoseContext *tctxt, char in_anchored) { DEBUG_PRINTF("program begins at offset %u\n", programOffset); assert(programOffset); @@ -800,6 +899,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, // and SPARSE_ITER_NEXT instructions. struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + // If this program has an effect, work_done will be set to one (which may + // allow the program to squash groups). + int work_done = 0; + assert(*(const u8 *)pc != ROSE_INSTR_END); for (;;) { @@ -812,7 +915,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, if (in_anchored && end > t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("delay until playback\n"); tctxt->groups |= ri->groups; - *work_done = 1; + work_done = 1; assert(ri->done_jump); // must progress pc += ri->done_jump; continue; @@ -820,6 +923,35 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LIT_MASK) { + assert(match_len); + if (!roseCheckBenefits(tctxt, end, match_len, ri->and_mask.a8, + ri->cmp_mask.a8)) { + DEBUG_PRINTF("halt: failed mask check\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LIT_EARLY) { + if (end < t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("halt: too soon, min offset=%u\n", + t->floatingMinLiteralMatchOffset); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n", + tctxt->groups, ri->groups); + if (!(ri->groups & tctxt->groups)) { + DEBUG_PRINTF("halt: no groups are set\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_ONLY_EOD) { struct core_info *ci = &tctxtToScratch(tctxt)->core_info; if (end != ci->buf_offset + ci->len) { @@ -874,6 +1006,11 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(PUSH_DELAYED) { + rosePushDelayedMatch(t, ri->delay, ri->index, end, tctxt); + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { assert(ri->distance <= end); som = end - ri->distance; @@ -890,7 +1027,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(TRIGGER_INFIX) { roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel, tctxt); - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -900,7 +1037,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -909,7 +1046,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -919,7 +1056,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -928,7 +1065,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, MO_HALT_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -937,7 +1074,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -947,7 +1084,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -957,7 +1094,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -965,7 +1102,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, DEBUG_PRINTF("set state index %u\n", ri->index); mmbit_set(getRoleState(tctxt->state), t->rolesWithStateCount, ri->index); - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -976,6 +1113,28 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SQUASH_GROUPS) { + assert(popcount64(ri->groups) == 63); // Squash only one group. + if (work_done) { + tctxt->groups &= ri->groups; + DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + DEBUG_PRINTF("check state %u\n", ri->index); + if (!mmbit_isset(getRoleState(tctxt->state), + t->rolesWithStateCount, ri->index)) { + DEBUG_PRINTF("state not on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_BEGIN) { DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); const struct mmbit_sparse_iter *it = @@ -1045,17 +1204,4 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, #undef PROGRAM_CASE #undef PROGRAM_NEXT_INSTRUCTION -static rose_inline -void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) { - assert(tl->squashesGroup); - - // we should be squashing a single group - assert(popcount64(tl->groups) == 1); - - DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n", - ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups); - - tctxt->groups &= ~tl->groups; -} - #endif // PROGRAM_RUNTIME_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5f6541919..9444005da 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -170,12 +170,16 @@ class RoseInstruction { const void *get() const { switch (code()) { + case ROSE_INSTR_CHECK_LIT_MASK: return &u.checkLitMask; + case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly; + case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups; case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; + case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; @@ -188,6 +192,8 @@ class RoseInstruction { case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; case ROSE_INSTR_SET_STATE: return &u.setState; case ROSE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups; + case ROSE_INSTR_CHECK_STATE: return &u.checkState; case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; case ROSE_INSTR_END: return &u.end; @@ -198,12 +204,16 @@ class RoseInstruction { size_t length() const { switch (code()) { + case ROSE_INSTR_CHECK_LIT_MASK: return sizeof(u.checkLitMask); + case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly); + case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups); case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); + case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); @@ -216,6 +226,8 @@ class RoseInstruction { case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups); + case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState); case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); case ROSE_INSTR_END: return sizeof(u.end); @@ -224,12 +236,16 @@ class RoseInstruction { } union { + ROSE_STRUCT_CHECK_LIT_MASK checkLitMask; + ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly; + ROSE_STRUCT_CHECK_GROUPS checkGroups; ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix; ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; + ROSE_STRUCT_PUSH_DELAYED pushDelayed; ROSE_STRUCT_SOM_ADJUST somAdjust; ROSE_STRUCT_SOM_LEFTFIX somLeftfix; ROSE_STRUCT_TRIGGER_INFIX triggerInfix; @@ -242,12 +258,25 @@ class RoseInstruction { ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; ROSE_STRUCT_SET_STATE setState; ROSE_STRUCT_SET_GROUPS setGroups; + ROSE_STRUCT_SQUASH_GROUPS squashGroups; + ROSE_STRUCT_CHECK_STATE checkState; ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; ROSE_STRUCT_END end; } u; }; +static +size_t hash_value(const RoseInstruction &ri) { + size_t val = 0; + const char *bytes = (const char *)ri.get(); + const size_t len = ri.length(); + for (size_t i = 0; i < len; i++) { + boost::hash_combine(val, bytes[i]); + } + return val; +} + struct build_context : boost::noncopyable { /** \brief information about engines to the left of a vertex */ map leftfix_info; @@ -270,6 +299,10 @@ struct build_context : boost::noncopyable { * up iterators in early misc. */ map, u32> iterCache; + /** \brief Simple cache of programs written to engine blob, used for + * deduplication. */ + ue2::unordered_map, u32> program_cache; + /** \brief LookEntry list cache, so that we don't have to go scanning * through the full list to find cases we've used already. */ ue2::unordered_map, size_t> lookaround_cache; @@ -284,6 +317,9 @@ struct build_context : boost::noncopyable { * that have already been pushed into the engine_blob. */ ue2::unordered_map engineOffsets; + /** \brief Minimum offset of a match from the floating table. */ + u32 floatingMinLiteralMatchOffset = 0; + /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ vector> engine_blob; @@ -1453,31 +1489,6 @@ void updateNfaState(const build_context &bc, RoseStateOffsets *so, } } -static -void buildLitBenefits(const RoseBuildImpl &tbi, RoseEngine *engine, - u32 base_lits_benefits_offset) { - lit_benefits *lba = (lit_benefits *)((char *)engine - + base_lits_benefits_offset); - DEBUG_PRINTF("base offset %u\n", base_lits_benefits_offset); - for (u32 i = 0; i < tbi.nonbenefits_base_id; i++) { - assert(contains(tbi.final_id_to_literal, i)); - assert(tbi.final_id_to_literal.at(i).size() == 1); - u32 lit_id = *tbi.final_id_to_literal.at(i).begin(); - const ue2_literal &s = tbi.literals.right.at(lit_id).s; - DEBUG_PRINTF("building mask for lit %u (fid %u) %s\n", lit_id, i, - dumpString(s).c_str()); - assert(s.length() <= MAX_MASK2_WIDTH); - u32 j = 0; - for (const auto &e : s) { - lba[i].and_mask.a8[j] = e.nocase ? 0 : CASE_BIT; - lba[i].expected.e8[j] = e.nocase ? 0 : (CASE_BIT & e.c); - DEBUG_PRINTF("a%02hhx e%02hhx\n", lba[i].and_mask.a8[j], - lba[i].expected.e8[j]); - j++; - } - } -} - /* does not include history requirements for outfixes or literal matchers */ u32 RoseBuildImpl::calcHistoryRequired() const { u32 m = cc.grey.minHistoryAvailable; @@ -2232,11 +2243,11 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre } static -u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { - const RoseGraph &g = tbi.g; +u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; u32 minWidth = ROSE_BOUND_INF; for (auto v : vertices_range(g)) { - if (tbi.isAnchored(v) || tbi.isVirtualVertex(v)) { + if (build.isAnchored(v) || build.isVirtualVertex(v)) { DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].idx); continue; } @@ -2656,12 +2667,21 @@ flattenProgram(const vector> &programs) { } static -u32 writeProgram(build_context &bc, vector &program) { +u32 writeProgram(build_context &bc, const vector &program) { if (program.empty()) { DEBUG_PRINTF("no program\n"); return 0; } + assert(program.back().code() == ROSE_INSTR_END); + assert(program.size() >= 1); + + auto it = bc.program_cache.find(program); + if (it != end(bc.program_cache)) { + DEBUG_PRINTF("reusing cached program at %u\n", it->second); + return it->second; + } + DEBUG_PRINTF("writing %zu instructions\n", program.size()); u32 programOffset = 0; for (const auto &ri : program) { @@ -2674,6 +2694,7 @@ u32 writeProgram(build_context &bc, vector &program) { } } DEBUG_PRINTF("program begins at offset %u\n", programOffset); + bc.program_cache.emplace(program, programOffset); return programOffset; } @@ -2764,72 +2785,6 @@ bool hasBoundaryReports(const BoundaryReports &boundary) { return false; } -static -void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, - vector &literalTable) { - const u32 final_id = verify_u32(literalTable.size()); - assert(contains(tbi.final_id_to_literal, final_id)); - const UNUSED u32 literalId = *tbi.final_id_to_literal.at(final_id).begin(); - /* all literal ids associated with this final id should result in identical - * literal entry */ - const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id); - const rose_literal_info &arb_lit_info = **lit_infos.begin(); - - literalTable.push_back(RoseLiteral()); - RoseLiteral &tl = literalTable.back(); - memset(&tl, 0, sizeof(tl)); - - tl.groups = 0; - for (const auto &li : lit_infos) { - tl.groups |= li->group_mask; - } - - assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED - || tbi.literals.right.at(literalId).table == ROSE_EVENT); - - // If this literal squashes its group behind it, store that data too - tl.squashesGroup = arb_lit_info.squash_group; - - // Setup the delay stuff - const auto &children = arb_lit_info.delayed_ids; - if (children.empty()) { - tl.delay_mask = 0; - tl.delayIdsOffset = ROSE_OFFSET_INVALID; - } else { - map local_delay_map; // delay -> relative child id - for (const auto &int_id : children) { - const rose_literal_id &child_literal = tbi.literals.right.at(int_id); - u32 child_id = tbi.literal_info[int_id].final_id; - u32 delay_index = child_id - tbi.delay_base_id; - tl.delay_mask |= 1U << child_literal.delay; - local_delay_map[child_literal.delay] = delay_index; - } - - vector delayIds; - for (const auto &did : local_delay_map | map_values) { - delayIds.push_back(did); - } - - tl.delayIdsOffset = add_to_engine_blob(bc, delayIds.begin(), - delayIds.end()); - - } - - assert(!tbi.literals.right.at(literalId).delay || !tl.delay_mask); -} - -// Construct the literal table. -static -void buildLiteralTable(const RoseBuildImpl &tbi, build_context &bc, - vector &literalTable) { - size_t numLiterals = tbi.final_id_to_literal.size(); - literalTable.reserve(numLiterals); - - for (size_t i = 0; i < numLiterals; ++i) { - createLiteralEntry(tbi, bc, literalTable); - } -} - /** * \brief True if the given vertex is a role that can only be switched on at * EOD. @@ -2945,8 +2900,11 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc, return; } - // TODO: also limit to matches that can occur after - // floatingMinLiteralMatchOffset. + // If this match cannot occur after floatingMinLiteralMatchOffset, we do + // not need this check. + if (build.g[v].max_offset <= bc.floatingMinLiteralMatchOffset) { + return; + } auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY); ri.u.anchoredDelay.groups = build.g[v].groups; @@ -3112,6 +3070,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, const RoseGraph &g = build.g; const RoseVertex u = source(e, g); + // We know that we can trust the anchored table (DFA) to always deliver us + // literals at the correct offset. + if (build.isAnchored(v)) { + DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); + return; + } + // Use the minimum literal length. u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); @@ -3347,97 +3312,171 @@ vector makePredProgram(RoseBuildImpl &build, build_context &bc, return program; } -/** - * Returns the pair (program offset, sparse iter offset). - */ static -pair makeSparseIterProgram(build_context &bc, - map>> &predProgramLists, - const vector &root_program) { - vector program; - u32 iter_offset = 0; - - if (!predProgramLists.empty()) { - // First, add the iterator itself. - vector keys; - for (const auto &elem : predProgramLists) { - keys.push_back(elem.first); - } - DEBUG_PRINTF("%zu keys: %s\n", keys.size(), - as_string_list(keys).c_str()); - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - iter_offset = addIteratorToTable(bc, iter); - - // Construct our program, starting with the SPARSE_ITER_BEGIN - // instruction, keeping track of the jump offset for each sub-program. - vector jump_table; - u32 curr_offset = 0; - - program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); - curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); - - for (const auto &e : predProgramLists) { - DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), - curr_offset); - jump_table.push_back(curr_offset); - auto subprog = flattenProgram(e.second); - - if (e.first != keys.back()) { - // For all but the last subprogram, replace the END instruction - // with a SPARSE_ITER_NEXT. - assert(!subprog.empty()); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); - } +u32 addPredBlocksSingle( + map>> &predProgramLists, + u32 curr_offset, vector &program) { + assert(predProgramLists.size() == 1); - for (const auto &ri : subprog) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } + u32 pred_state = predProgramLists.begin()->first; + auto subprog = flattenProgram(predProgramLists.begin()->second); + + // Check our pred state. + auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE); + ri.u.checkState.index = pred_state; + program.push_back(ri); + curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + // Add subprogram. + for (const auto &ri : subprog) { + program.push_back(ri); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + + const u32 end_offset = + curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + // Fix up the instruction operands. + curr_offset = 0; + for (size_t i = 0; i < program.size(); i++) { + auto &ri = program[i]; + switch (ri.code()) { + case ROSE_INSTR_CHECK_STATE: + ri.u.checkState.fail_jump = end_offset - curr_offset; + break; + default: + break; } + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } - const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(), - ROSE_INSTR_MIN_ALIGN); + return 0; // No iterator. +} - // Write the jump table into the bytecode. - const u32 jump_table_offset = - add_to_engine_blob(bc, begin(jump_table), end(jump_table)); +static +u32 addPredBlocksMulti(build_context &bc, + map>> &predProgramLists, + u32 curr_offset, vector &program) { + assert(!predProgramLists.empty()); - // Fix up the instruction operands. - auto keys_it = begin(keys); - curr_offset = 0; - for (size_t i = 0; i < program.size(); i++) { - auto &ri = program[i]; - switch (ri.code()) { - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.iter_offset = iter_offset; - ri.u.sparseIterBegin.jump_table = jump_table_offset; - ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.iter_offset = iter_offset; - ri.u.sparseIterNext.jump_table = jump_table_offset; - assert(keys_it != end(keys)); - ri.u.sparseIterNext.state = *keys_it++; - ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; - break; - default: - break; - } + // First, add the iterator itself. + vector keys; + for (const auto &elem : predProgramLists) { + keys.push_back(elem.first); + } + DEBUG_PRINTF("%zu keys: %s\n", keys.size(), as_string_list(keys).c_str()); + + vector iter; + mmbBuildSparseIterator(iter, keys, bc.numStates); + assert(!iter.empty()); + u32 iter_offset = addIteratorToTable(bc, iter); + + // Construct our program, starting with the SPARSE_ITER_BEGIN + // instruction, keeping track of the jump offset for each sub-program. + vector jump_table; + + program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); + curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + for (const auto &e : predProgramLists) { + DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), + curr_offset); + jump_table.push_back(curr_offset); + auto subprog = flattenProgram(e.second); + + if (e.first != keys.back()) { + // For all but the last subprogram, replace the END instruction + // with a SPARSE_ITER_NEXT. + assert(!subprog.empty()); + assert(subprog.back().code() == ROSE_INSTR_END); + subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); + } + + for (const auto &ri : subprog) { + program.push_back(ri); curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } } + const u32 end_offset = + curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + // Write the jump table into the bytecode. + const u32 jump_table_offset = + add_to_engine_blob(bc, begin(jump_table), end(jump_table)); + + // Fix up the instruction operands. + auto keys_it = begin(keys); + curr_offset = 0; + for (size_t i = 0; i < program.size(); i++) { + auto &ri = program[i]; + switch (ri.code()) { + case ROSE_INSTR_SPARSE_ITER_BEGIN: + ri.u.sparseIterBegin.iter_offset = iter_offset; + ri.u.sparseIterBegin.jump_table = jump_table_offset; + ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; + break; + case ROSE_INSTR_SPARSE_ITER_NEXT: + ri.u.sparseIterNext.iter_offset = iter_offset; + ri.u.sparseIterNext.jump_table = jump_table_offset; + assert(keys_it != end(keys)); + ri.u.sparseIterNext.state = *keys_it++; + ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; + break; + default: + break; + } + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + + return iter_offset; +} + +static +u32 addPredBlocks(build_context &bc, + map>> &predProgramLists, + u32 curr_offset, vector &program, + bool force_sparse_iter) { + const size_t num_preds = predProgramLists.size(); + if (num_preds == 0) { + program = flattenProgram({program}); + return 0; // No iterator. + } else if (!force_sparse_iter && num_preds == 1) { + return addPredBlocksSingle(predProgramLists, curr_offset, program); + } else { + return addPredBlocksMulti(bc, predProgramLists, curr_offset, program); + } +} + +/** + * Returns the pair (program offset, sparse iter offset). + */ +static +pair makeSparseIterProgram(build_context &bc, + map>> &predProgramLists, + const vector &root_program, + const vector &pre_program) { + vector program; + u32 curr_offset = 0; + + // Add pre-program first. + for (const auto &ri : pre_program) { + program.push_back(ri); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). This operation will flatten the program up to this + // point. + u32 iter_offset = + addPredBlocks(bc, predProgramLists, curr_offset, program, false); + // If we have a root program, replace the END instruction with it. Note // that the root program has already been flattened. + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); if (!root_program.empty()) { - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } + program.pop_back(); program.insert(end(program), begin(root_program), end(root_program)); } @@ -3445,15 +3484,182 @@ pair makeSparseIterProgram(build_context &bc, } static -u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, +void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, + vector &program) { + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &arb_lit_info = **lit_infos.begin(); + if (arb_lit_info.delayed_ids.empty()) { + return; + } + + for (const auto &int_id : arb_lit_info.delayed_ids) { + const auto &child_literal = build.literals.right.at(int_id); + u32 child_id = build.literal_info[int_id].final_id; + u32 delay_index = child_id - build.delay_base_id; + + DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id, + child_literal.delay, child_id); + + auto ri = RoseInstruction(ROSE_INSTR_PUSH_DELAYED); + ri.u.pushDelayed.delay = verify_u8(child_literal.delay); + ri.u.pushDelayed.index = delay_index; + program.push_back(move(ri)); + } +} + +static +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + + rose_group groups = 0; + for (const auto &li : lit_infos) { + groups |= li->group_mask; + } + + if (!groups) { + return; + } + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_GROUPS); + ri.u.checkGroups.groups = groups; + program.push_back(move(ri)); +} + +static +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + assert(!lit_infos.empty()); + + if (!lit_infos.front()->requires_benefits) { + return; + } + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_MASK); + + assert(build.final_id_to_literal.at(final_id).size() == 1); + u32 lit_id = *build.final_id_to_literal.at(final_id).begin(); + const ue2_literal &s = build.literals.right.at(lit_id).s; + DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id, + final_id, dumpString(s).c_str()); + assert(s.length() <= MAX_MASK2_WIDTH); + u32 i = 0; + for (const auto &e : s) { + ri.u.checkLitMask.and_mask.a8[i] = e.nocase ? 0 : CASE_BIT; + ri.u.checkLitMask.cmp_mask.a8[i] = e.nocase ? 0 : (CASE_BIT & e.c); + i++; + } + + program.push_back(move(ri)); +} + +static +void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + + if (!lit_infos.front()->squash_group) { + return; + } + + rose_group groups = 0; + for (const auto &li : lit_infos) { + groups |= li->group_mask; + } + + if (!groups) { + return; + } + + DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups); + + auto ri = RoseInstruction(ROSE_INSTR_SQUASH_GROUPS); + ri.u.squashGroups.groups = ~groups; // Negated, so we can just AND it in. + program.push_back(move(ri)); +} + +static +void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges, + vector &program) { + if (lit_edges.empty()) { + return; + } + + if (bc.floatingMinLiteralMatchOffset == 0) { + return; + } + + RoseVertex v = target(lit_edges.front(), build.g); + if (!build.isFloating(v)) { + return; + } + + const auto &lit_ids = build.final_id_to_literal.at(final_id); + if (lit_ids.empty()) { + return; + } + + size_t min_offset = SIZE_MAX; + for (u32 lit_id : lit_ids) { + const auto &lit = build.literals.right.at(lit_id); + min_offset = min(min_offset, lit.elength()); + } + + DEBUG_PRINTF("%zu lits, min_offset=%zu\n", lit_ids.size(), min_offset); + + // If we can't match before the min offset, we don't need the check. + if (min_offset >= bc.floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("no need for check, min is %u\n", + bc.floatingMinLiteralMatchOffset); + return; + } + + program.push_back(RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY)); +} + +static +vector buildLitInitialProgram(RoseBuildImpl &build, + build_context &bc, u32 final_id, + const vector &lit_edges) { + vector pre_program; + + // No initial program for EOD. + if (final_id == MO_INVALID_IDX) { + return pre_program; + } + + DEBUG_PRINTF("final_id %u\n", final_id); + + // Check lit mask. + makeCheckLitMaskInstruction(build, final_id, pre_program); + + // Check literal groups. + makeGroupCheckInstruction(build, final_id, pre_program); + + // Add instructions for pushing delayed matches, if there are any. + makePushDelayedInstructions(build, final_id, pre_program); + + // Add pre-check for early literals in the floating table. + makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, pre_program); + + return pre_program; +} + +static +u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges) { const auto &g = build.g; - DEBUG_PRINTF("%zu lit edges\n", lit_edges.size()); + DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); // pred state id -> list of programs map>> predProgramLists; - vector nonroot_verts; // Construct sparse iter sub-programs. for (const auto &e : lit_edges) { @@ -3467,7 +3673,6 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 pred_state = bc.roleStateIndices.at(u); auto program = makePredProgram(build, bc, e); predProgramLists[pred_state].push_back(program); - nonroot_verts.push_back(target(e, g)); } // Construct sub-program for handling root roles. @@ -3485,13 +3690,39 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, root_programs.push_back(role_prog); } + // Literal may squash groups. + if (final_id != MO_INVALID_IDX) { + root_programs.push_back({}); + makeGroupSquashInstruction(build, final_id, root_programs.back()); + } + vector root_program; if (!root_programs.empty()) { root_program = flattenProgram(root_programs); } + auto pre_program = buildLitInitialProgram(build, bc, final_id, lit_edges); + // Put it all together. - return makeSparseIterProgram(bc, predProgramLists, root_program).first; + return makeSparseIterProgram(bc, predProgramLists, root_program, + pre_program).first; +} + +static +u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id) { + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &arb_lit_info = **lit_infos.begin(); + if (arb_lit_info.delayed_ids.empty()) { + return 0; // No delayed IDs, no work to do. + } + + vector program; + makeCheckLitMaskInstruction(build, final_id, program); + makePushDelayedInstructions(build, final_id, program); + assert(!program.empty()); + program = flattenProgram({program}); + return writeProgram(bc, program); } static @@ -3530,17 +3761,35 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { return lit_edge_map; } -/** \brief Build the interpreter program for each literal. */ +/** + * \brief Build the interpreter programs for each literal. + * + * Returns the base of the literal program list and the base of the delay + * rebuild program list. + */ static -void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, - vector &literalTable) { +pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { + const u32 num_literals = build.final_id_to_literal.size(); auto lit_edge_map = findEdgesByLiteral(build); - for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { + vector litPrograms(num_literals); + vector delayRebuildPrograms(num_literals); + + for (u32 finalId = 0; finalId != num_literals; ++finalId) { const auto &lit_edges = lit_edge_map[finalId]; - u32 offset = buildLiteralProgram(build, bc, lit_edges); - literalTable[finalId].programOffset = offset; + + litPrograms[finalId] = + buildLiteralProgram(build, bc, finalId, lit_edges); + delayRebuildPrograms[finalId] = + buildDelayRebuildProgram(build, bc, finalId); } + + u32 litProgramsOffset = + add_to_engine_blob(bc, begin(litPrograms), end(litPrograms)); + u32 delayRebuildProgramsOffset = add_to_engine_blob( + bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); + + return {litProgramsOffset, delayRebuildProgramsOffset}; } static @@ -3604,7 +3853,14 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { return {0, 0}; } - return makeSparseIterProgram(bc, predProgramLists, {}); + vector program; + + // Note: we force the use of a sparse iterator for the EOD program so we + // can easily guard EOD execution at runtime. + u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true); + + assert(program.size() > 1); + return {writeProgram(bc, program), iter_offset}; } static @@ -3634,7 +3890,7 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { tie(g[source(b, g)].idx, g[target(b, g)].idx); }); - return buildLiteralProgram(build, bc, edge_list); + return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); } static @@ -3780,6 +4036,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { aligned_unique_ptr sbtable = buildSmallBlockMatcher(*this, &sbsize); build_context bc; + bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this); // Build NFAs set no_retrigger_queues; @@ -3805,10 +4062,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - u32 lit_benefits_size = - verify_u32(sizeof(lit_benefits) * nonbenefits_base_id); - assert(ISALIGNED_16(lit_benefits_size)); - vector suffixEkeyLists; buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists); @@ -3820,9 +4073,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - vector literalTable; - buildLiteralTable(*this, bc, literalTable); - buildLiteralPrograms(*this, bc, literalTable); + u32 litProgramOffset; + u32 litDelayRebuildProgramOffset; + tie(litProgramOffset, litDelayRebuildProgramOffset) = + buildLiteralPrograms(*this, bc); u32 eodProgramOffset = writeEodProgram(*this, bc); u32 eodIterProgramOffset; @@ -3857,10 +4111,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); - /* leave space for the benefits listing */ - u32 base_lits_benefits_offset = currOffset; - currOffset += lit_benefits_size; - if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset; @@ -3891,10 +4141,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 intReportOffset = currOffset; currOffset += sizeof(internal_report) * int_reports.size(); - u32 literalOffset = ROUNDUP_N(currOffset, alignof(RoseLiteral)); - u32 literalLen = sizeof(RoseLiteral) * literalTable.size(); - currOffset = literalOffset + literalLen; - u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size(); currOffset = leftOffset + roseLen; @@ -4016,8 +4262,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillInReportInfo(engine.get(), intReportOffset, rm, int_reports); - engine->literalOffset = literalOffset; - engine->literalCount = verify_u32(literalTable.size()); + engine->literalCount = verify_u32(final_id_to_literal.size()); + engine->litProgramOffset = litProgramOffset; + engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; engine->runtimeImpl = pickRuntimeImpl(*this, outfixEndQueue); engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); @@ -4053,14 +4300,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->lastByteHistoryIterOffset = lastByteOffset; - u32 delay_count = verify_u32(literalTable.size() - delay_base_id); + u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id); engine->delay_count = delay_count; engine->delay_slot_size = mmbit_size(delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; engine->anchored_count = delay_base_id - anchored_base_id; - engine->nonbenefits_base_id = nonbenefits_base_id; - engine->literalBenefitsOffsets = base_lits_benefits_offset; engine->rosePrefixCount = rosePrefixCount; @@ -4094,7 +4339,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; engine->minWidthExcludingBoundaries = minWidth; engine->maxSafeAnchoredDROffset = findMinWidth(*this, ROSE_FLOATING); - engine->floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this); + engine->floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset; engine->maxBiAnchoredWidth = findMaxBAWidth(*this); engine->noFloatingRoots = hasNoFloatingRoots(); @@ -4109,7 +4354,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillMatcherDistances(*this, engine.get()); engine->initialGroups = getInitialGroups(); - engine->totalNumLiterals = verify_u32(literalTable.size()); + engine->totalNumLiterals = verify_u32(literal_info.size()); engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; engine->floatingStreamState = verify_u32(floatingStreamStateRequired); @@ -4138,12 +4383,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &engine->scratchStateSize, &engine->nfaStateSize, &engine->tStateSize); - /* do after update mask */ - buildLitBenefits(*this, engine.get(), base_lits_benefits_offset); - // Copy in other tables copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); - copy_bytes(ptr + engine->literalOffset, literalTable); copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 2a3fe5406..6202299ba 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -258,7 +258,6 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { set anch; set norm; - set norm_benefits; set delay; /* undelayed ids come first */ @@ -281,12 +280,8 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { continue; } - const rose_literal_info &info = tbi.literal_info[i]; - if (info.requires_benefits) { - assert(!tbi.isDelayed(i)); - norm_benefits.insert(i); - DEBUG_PRINTF("%u has benefits\n", i); - } else if (tbi.isDelayed(i)) { + if (tbi.isDelayed(i)) { + assert(!tbi.literal_info[i].requires_benefits); delay.insert(i); } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) { anch.insert(i); @@ -295,12 +290,7 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { } } - /* normal lits first (with benefits confirm)*/ - allocateFinalIdToSet(g, norm_benefits, &tbi.literal_info, - &tbi.final_id_to_literal, &next_final_id); - - /* other normal lits (without benefits)*/ - tbi.nonbenefits_base_id = next_final_id; + /* normal lits */ allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal, &next_final_id); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index a7f2e2f70..c6d10063f 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -496,7 +496,6 @@ class RoseBuildImpl : public RoseBuild { u32 anchored_base_id; - u32 nonbenefits_base_id; u32 ematcher_region_size; /**< number of bytes the eod table runs over */ /** \brief Mapping from anchored literal ID to the original literal suffix diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 044a4208f..66b0bdd44 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,7 +78,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, group_weak_end(0), group_end(0), anchored_base_id(MO_INVALID_IDX), - nonbenefits_base_id(MO_INVALID_IDX), ematcher_region_size(0), floating_direct_report(false), eod_event_literal_id(MO_INVALID_IDX), diff --git a/src/rose/rose_build_util.h b/src/rose/rose_build_util.h index fe2124a0f..536b031a3 100644 --- a/src/rose/rose_build_util.h +++ b/src/rose/rose_build_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,21 +36,6 @@ namespace ue2 { -// Calculate the minimum depth for the given set of vertices, ignoring those -// with depth 1. -template -static -u8 calcMinDepth(const std::map &depths, const Cont &verts) { - u8 d = 255; - for (RoseVertex v : verts) { - u8 vdepth = (u8)std::min((u32)255, depths.at(v)); - if (vdepth > 1) { - d = std::min(d, vdepth); - } - } - return d; -} - // Comparator for vertices using their index property. struct VertexIndexComp { VertexIndexComp(const RoseGraph &gg) : g(gg) {} diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 6210d1022..cd70c734c 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -116,31 +116,6 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); } -static -u32 literalsWithDirectReports(const RoseEngine *t) { - return t->totalNumLiterals - t->literalCount; -} - -template -static -size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) { - const RoseLiteral *tl = getLiteralTable(t); - const RoseLiteral *tl_end = tl + t->literalCount; - - return count_if(tl, tl_end, pred); -} - -static -size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) { - rose_group mask = ~((1ULL << from) - 1); - if (to < 64) { - mask &= ((1ULL << to) - 1); - } - - return literalsWithPredicate( - t, [&mask](const RoseLiteral &l) { return l.groups & mask; }); -} - static CharReach bitvectorToReach(const u8 *reach) { CharReach cr; @@ -177,6 +152,16 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } } +static +string dumpStrMask(const u8 *mask, size_t len) { + ostringstream oss; + for (size_t i = 0; i < len; i++) { + oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]} + << " "; + } + return oss.str(); +} + #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ @@ -202,14 +187,26 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ONLY_EOD) { - os << " fail_jump +" << ri->fail_jump << endl; + PROGRAM_CASE(CHECK_LIT_MASK) { + os << " and_mask " + << dumpStrMask(ri->and_mask.a8, sizeof(ri->and_mask.a8)) + << endl; + os << " cmp_mask " + << dumpStrMask(ri->cmp_mask.a8, sizeof(ri->cmp_mask.a8)) + << endl; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_BOUNDS) { - os << " min_bound " << ri->min_bound << endl; - os << " max_bound " << ri->max_bound << endl; + PROGRAM_CASE(CHECK_LIT_EARLY) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -236,6 +233,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(PUSH_DELAYED) { + os << " delay " << u32{ri->delay} << endl; + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { os << " distance " << ri->distance << endl; } @@ -301,6 +304,18 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SQUASH_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + os << " index " << ri->index << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_BEGIN) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; @@ -334,21 +349,32 @@ static void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); - const RoseLiteral *lits = getLiteralTable(t); - const char *base = (const char *)t; + const u32 *litPrograms = + (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset); + const u32 *delayRebuildPrograms = + (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset); for (u32 i = 0; i < t->literalCount; i++) { - const RoseLiteral *lit = &lits[i]; os << "Literal " << i << endl; os << "---------------" << endl; - if (lit->programOffset) { - os << "Program @ " << lit->programOffset << ":" << endl; - dumpProgram(os, t, base + lit->programOffset); + if (litPrograms[i]) { + os << "Program @ " << litPrograms[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, litPrograms[i]); + dumpProgram(os, t, prog); } else { os << "" << endl; } + if (delayRebuildPrograms[i]) { + os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":" + << endl; + const char *prog = (const char *)loadFromByteCodeOffset( + t, delayRebuildPrograms[i]); + dumpProgram(os, t, prog); + } + os << endl; } @@ -710,8 +736,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); - fprintf(f, " - literal table : %zu bytes\n", - t->literalCount * sizeof(RoseLiteral)); fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %u bytes\n", @@ -745,22 +769,9 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "handled key count : %u\n", t->handledKeyCount); fprintf(f, "\n"); - fprintf(f, "number of literals : %u\n", t->totalNumLiterals); - fprintf(f, " - delayed : %u\n", t->delay_count); - fprintf(f, " - direct report : %u\n", - literalsWithDirectReports(t)); - fprintf(f, " - that squash group : %zu\n", - literalsWithPredicate( - t, [](const RoseLiteral &l) { return l.squashesGroup != 0; })); - fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); - fprintf(f, " - with program : %zu\n", - literalsWithPredicate( - t, [](const RoseLiteral &l) { return l.programOffset != 0; })); - fprintf(f, " - in groups ::\n"); - fprintf(f, " + weak : %zu\n", - literalsInGroups(t, 0, t->group_weak_end)); - fprintf(f, " + general : %zu\n", - literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8)); + fprintf(f, "total literal count : %u\n", t->totalNumLiterals); + fprintf(f, " prog table size : %u\n", t->literalCount); + fprintf(f, " delayed literals : %u\n", t->delay_count); fprintf(f, "\n"); fprintf(f, " minWidth : %u\n", t->minWidth); @@ -839,7 +850,8 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, fmatcherMaxBiAnchoredWidth); DUMP_U32(t, intReportOffset); DUMP_U32(t, intReportCount); - DUMP_U32(t, literalOffset); + DUMP_U32(t, litProgramOffset); + DUMP_U32(t, litDelayRebuildProgramOffset); DUMP_U32(t, literalCount); DUMP_U32(t, multidirectOffset); DUMP_U32(t, activeArrayCount); @@ -876,7 +888,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); DUMP_U32(t, anchored_base_id); - DUMP_U32(t, nonbenefits_base_id); DUMP_U32(t, maxFloatingDelayedMatch); DUMP_U32(t, delayRebuildLength); DUMP_U32(t, stateOffsets.history); @@ -905,7 +916,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, rosePrefixCount); DUMP_U32(t, activeLeftIterOffset); DUMP_U32(t, ematcherRegionSize); - DUMP_U32(t, literalBenefitsOffsets); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); DUMP_U32(t, group_weak_end); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 92a67ae14..c90256008 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,43 +73,11 @@ ReportID literalToReport(u32 id) { return id & ~LITERAL_DR_FLAG; } -/** \brief Structure representing a literal. */ -struct RoseLiteral { - /** - * \brief Program to run when this literal is seen. - * - * Offset is relative to RoseEngine, or zero for no program. - */ - u32 programOffset; - - /** \brief Bitset of groups that cause this literal to fire. */ - rose_group groups; - - /** - * \brief True if this literal switches off its group behind it when it - * sets a role. - */ - u8 squashesGroup; - - /** - * \brief Bitset which indicates that the literal inserts a delayed - * match at the given offset. - */ - u32 delay_mask; - - /** \brief Offset to array of ids to poke in the delay structure. */ - u32 delayIdsOffset; -}; - /* Allocation of Rose literal ids * * The rose literal id space is segmented: * * ---- 0 - * | | Normal undelayed literals in the e, or f tables which require a - * | | manual benefits confirm on match [a table never requires benefits] - * | | - * ---- nonbenefits_base_id * | | 'Normal' undelayed literals in either e or f tables * | | * | | @@ -127,7 +95,7 @@ struct RoseLiteral { * ---- LITERAL_DR_FLAG * | | Direct Report literals: immediately raise an internal report with id * | | given by (lit_id & ~LITERAL_DR_FLAG). Raised by a or f tables (or e??). - * | | No RoseLiteral structure + * | | No literal programs. * | | * | | * ---- @@ -135,14 +103,15 @@ struct RoseLiteral { /* Rose Literal Sources * - * Rose currently gets events (mainly roseProcessMatch calls) from 8 sources: + * Rose currently gets events (mainly roseProcessMatch calls) from a number of + * sources: * 1) The floating table * 2) The anchored table * 3) Delayed literals - * 4) suffixes NFAs - * 5) masksv2 (literals with benefits) - * 6) End anchored table - * 7) prefix / infix nfas + * 4) Suffix NFAs + * 5) Literal masks + * 5) End anchored table + * 6) Prefix / Infix nfas * * Care is required to ensure that events appear to come into Rose in order * (or sufficiently ordered for Rose to cope). Generally the progress of the @@ -165,7 +134,7 @@ struct RoseLiteral { * NFA queues are run to the current point (floating or delayed literal) as * appropriate. * - * Maskv2: + * Literal Masks: * These are triggered from either floating literals or delayed literals and * inspect the data behind them. Matches are raised at the same location as the * trigger literal so there are no ordering issues. Masks are always pure @@ -301,12 +270,12 @@ struct RoseStateOffsets { }; struct RoseBoundaryReports { - u32 reportEodOffset; /**< 0 if no reports lits, otherwise offset of + u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of * MO_INVALID_IDX terminated list to report at EOD */ - u32 reportZeroOffset; /**< 0 if no reports lits, otherwise offset of + u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of * MO_INVALID_IDX terminated list to report at offset * 0 */ - u32 reportZeroEodOffset; /**< 0 if no reports lits, otherwise offset of + u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of * MO_INVALID_IDX terminated list to report if eod * is at offset 0. Superset of other lists. */ }; @@ -338,18 +307,20 @@ struct RoseBoundaryReports { #define ROSE_RUNTIME_PURE_LITERAL 1 #define ROSE_RUNTIME_SINGLE_OUTFIX 2 -// Runtime structure header for Rose. -// In memory, we follow this with: -// 1a. anchored 'literal' matcher table -// 1b. floating literal matcher table -// 1c. eod-anchored literal matcher table -// 1d. small block table -// 2. array of RoseLiteral (literalCount entries) -// 8. array of NFA offsets, one per queue -// 9. array of state offsets, one per queue (+) -// 10. array of role ids for the set of all root roles -// 12. multi-direct report array -/* +/** + * \brief Runtime structure header for Rose. + * + * Runtime structure header for Rose. + * In memory, we follow this with: + * -# the "engine blob" + * -# anchored 'literal' matcher table + * -# floating literal matcher table + * -# eod-anchored literal matcher table + * -# small block table + * -# array of NFA offsets, one per queue + * -# array of state offsets, one per queue (+) + * -# multi-direct report array + * * (+) stateOffset array note: Offsets in the array are either into the stream * state (normal case) or into the tstate region of scratch (for transient rose * nfas). Rose nfa info table can distinguish the cases. @@ -407,8 +378,22 @@ struct RoseEngine { * with the anchored table. */ u32 intReportOffset; /**< offset of array of internal_report structures */ u32 intReportCount; /**< number of internal_report structures */ - u32 literalOffset; // offset of RoseLiteral array (bytes) - u32 literalCount; // number of RoseLiteral entries [NOT number of literals] + + /** \brief Offset of u32 array of program offsets for literals. */ + u32 litProgramOffset; + + /** \brief Offset of u32 array of delay rebuild program offsets for + * literals. */ + u32 litDelayRebuildProgramOffset; + + /** + * \brief Number of entries in the arrays pointed to by litProgramOffset, + * litDelayRebuildProgramOffset. + * + * Note: NOT the total number of literals. + */ + u32 literalCount; + u32 multidirectOffset; /**< offset of multi-direct report list. */ u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array @@ -468,8 +453,6 @@ struct RoseEngine { u32 anchored_count; /* number of anchored literal ids */ u32 anchored_base_id; /* literal id of the first literal in the A table. * anchored literal ids are contiguous */ - u32 nonbenefits_base_id; /* first literal id without benefit conf. - * contiguous, blah, blah */ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can * usefully be reported */ u32 delayRebuildLength; /* length of the history region which needs to be @@ -486,8 +469,6 @@ struct RoseEngine { u32 rosePrefixCount; /* number of rose prefixes */ u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */ u32 ematcherRegionSize; /* max region size to pass to ematcher */ - u32 literalBenefitsOffsets; /* offset to array of benefits indexed by lit - id */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ u32 group_weak_end; /* end of weak groups, debugging only */ @@ -496,17 +477,6 @@ struct RoseEngine { struct scatter_full_plan state_init; }; -struct lit_benefits { - union { - u64a a64[MAX_MASK2_WIDTH/sizeof(u64a)]; - u8 a8[MAX_MASK2_WIDTH]; - } and_mask; - union { - u64a e64[MAX_MASK2_WIDTH/sizeof(u64a)]; - u8 e8[MAX_MASK2_WIDTH]; - } expected; -}; - #if defined(_WIN32) #pragma pack(push, 1) #endif @@ -574,14 +544,6 @@ const void *getSBLiteralMatcher(const struct RoseEngine *t) { return matcher; } -static really_inline -const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) { - const struct RoseLiteral *tl - = (const struct RoseLiteral *)((const char *)t + t->literalOffset); - assert(ISALIGNED_N(tl, 4)); - return tl; -} - static really_inline const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) { const struct LeftNfaInfo *r @@ -601,13 +563,6 @@ const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) { return it; } -static really_inline -const struct lit_benefits *getLiteralBenefitsTable( - const struct RoseEngine *t) { - return (const struct lit_benefits *) - ((const char *)t + t->literalBenefitsOffsets); -} - static really_inline const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) { const struct NfaInfo *infos diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 3f59ba15a..37017ca0a 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,11 +42,15 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_INSTR_CHECK_LIT_MASK, //!< Check and/cmp mask. + ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. + ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. + ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. @@ -59,6 +63,8 @@ enum RoseInstructionCode { ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. ROSE_INSTR_SET_STATE, //!< Switch a state index on. ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. + ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. ROSE_INSTR_END //!< End of program. @@ -70,6 +76,29 @@ struct ROSE_STRUCT_ANCHORED_DELAY { u32 done_jump; //!< Jump forward this many bytes if successful. }; +union RoseLiteralMask { + u64a a64[MAX_MASK2_WIDTH / sizeof(u64a)]; + u8 a8[MAX_MASK2_WIDTH]; +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LIT_MASK { + u8 code; //!< From enum RoseInstructionCode. + union RoseLiteralMask and_mask; + union RoseLiteralMask cmp_mask; +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LIT_EARLY { + u8 code; //!< From enum RoseInstructionCode. +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_GROUPS { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask. +}; + struct ROSE_STRUCT_CHECK_ONLY_EOD { u8 code; //!< From enum RoseInstructionCode. u32 fail_jump; //!< Jump forward this many bytes on failure. @@ -103,6 +132,12 @@ struct ROSE_STRUCT_CHECK_LEFTFIX { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_PUSH_DELAYED { + u8 code; //!< From enum RoseInstructionCode. + u8 delay; // Number of bytes to delay. + u32 index; // Delay literal index (relative to first delay lit). +}; + struct ROSE_STRUCT_SOM_ADJUST { u8 code; //!< From enum RoseInstructionCode. u32 distance; //!< Distance to EOM. @@ -164,7 +199,18 @@ struct ROSE_STRUCT_SET_STATE { struct ROSE_STRUCT_SET_GROUPS { u8 code; //!< From enum RoseInstructionCode. - rose_group groups; //!< Bitmask. + rose_group groups; //!< Bitmask to OR into groups. +}; + +struct ROSE_STRUCT_SQUASH_GROUPS { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask to AND into groups. +}; + +struct ROSE_STRUCT_CHECK_STATE { + u8 code; //!< From enum RoseInstructionCode. + u32 index; //!< State index in the role multibit. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; /** From 39886a09687581dd2b89b214f8f10a8405b0f2d0 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 12 Jan 2016 14:48:35 +1100 Subject: [PATCH 043/218] Coverity: Restore output stream format --- src/util/dump_charclass.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/util/dump_charclass.cpp b/src/util/dump_charclass.cpp index 2cadc67a4..74b45414a 100644 --- a/src/util/dump_charclass.cpp +++ b/src/util/dump_charclass.cpp @@ -72,8 +72,10 @@ void describeChar(ostream &os, char c, enum cc_output_t out_type) { } else if (c == 0x0d) { os << backslash << 'r'; } else { + auto fmt(os.flags()); os << backslash << 'x' << std::hex << std::setw(2) - << std::setfill('0') << (unsigned)(c & 0xff) << std::dec; + << std::setfill('0') << (unsigned)(c & 0xff); + os.flags(fmt); } } From 8783750c729eacbc4618e1f45cf453190fb4268a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 12 Jan 2016 14:10:23 +1100 Subject: [PATCH 044/218] Remove dupe engine, state ptrs from RoseContext Remove the RoseEngine and stream state pointers frose RoseContext, as they are also present in core_info. Unify stream state handing in Rose to always use a char * (we were often a u8 * for no particularly good reason) and tidy up. --- src/rose/block.c | 12 ++-- src/rose/catchup.c | 139 +++++++++++++++++++------------------ src/rose/catchup.h | 16 ++--- src/rose/eod.c | 24 +++---- src/rose/init.c | 8 +-- src/rose/init.h | 4 +- src/rose/match.c | 95 ++++++++++++++----------- src/rose/match.h | 18 ++--- src/rose/program_runtime.h | 117 ++++++++++++++++--------------- src/rose/rose.h | 12 ++-- src/rose/runtime.h | 31 +++++---- src/rose/stream.c | 28 ++++---- src/runtime.c | 25 ++++--- src/scratch.h | 4 +- 14 files changed, 272 insertions(+), 261 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index 3d1eb9e32..a3174b637 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,7 +78,7 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, } static really_inline -void init_state_for_block(const struct RoseEngine *t, u8 *state) { +void init_state_for_block(const struct RoseEngine *t, char *state) { assert(t); assert(state); @@ -93,7 +93,7 @@ void init_state_for_block(const struct RoseEngine *t, u8 *state) { static really_inline void init_outfixes_for_block(const struct RoseEngine *t, - struct hs_scratch *scratch, u8 *state, + struct hs_scratch *scratch, char *state, char is_small_block) { /* active leaf array has been cleared by the init scatter */ @@ -135,18 +135,16 @@ void init_outfixes_for_block(const struct RoseEngine *t, static really_inline void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, - void *ctxt, u8 *state, char is_small_block) { + void *ctxt, char *state, char is_small_block) { init_state_for_block(t, state); struct RoseContext *tctxt = &scratch->tctxt; - tctxt->t = t; tctxt->groups = t->initialGroups; tctxt->lit_offset_adjust = 1; // index after last byte tctxt->delayLastEndOffset = 0; tctxt->lastEndOffset = 0; tctxt->filledDelayedSlots = 0; - tctxt->state = state; tctxt->cb = callback; tctxt->cb_som = som_callback; tctxt->userCtx = ctxt; @@ -185,7 +183,7 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, const char is_small_block = (length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset); - u8 *state = (u8 *)scratch->core_info.state; + char *state = scratch->core_info.state; init_for_block(t, scratch, callback, som_callback, ctx, state, is_small_block); diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 77b12b49f..d1ef41ff1 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,7 +43,8 @@ typedef struct queue_match PQ_T; static really_inline int handleReportInternally(struct hs_scratch *scratch, ReportID id, u64a offset) { - const struct internal_report *ri = getInternalReport(scratch->tctxt.t, id); + const struct RoseEngine *t = scratch->core_info.rose; + const struct internal_report *ri = getInternalReport(t, id); if (ri->type == EXTERNAL_CALLBACK) { return 0; } @@ -52,8 +53,7 @@ int handleReportInternally(struct hs_scratch *scratch, ReportID id, return 1; } if (ri->type == INTERNAL_ROSE_CHAIN) { - roseHandleChainMatch(scratch->tctxt.t, id, offset, &scratch->tctxt, 0, - 1); + roseHandleChainMatch(t, id, offset, &scratch->tctxt, 0, 1); return 1; } @@ -63,7 +63,8 @@ int handleReportInternally(struct hs_scratch *scratch, ReportID id, static really_inline int handleReportInternallyNoChain(struct hs_scratch *scratch, ReportID id, u64a offset) { - const struct internal_report *ri = getInternalReport(scratch->tctxt.t, id); + const struct RoseEngine *t = scratch->core_info.rose; + const struct internal_report *ri = getInternalReport(t, id); if (ri->type == EXTERNAL_CALLBACK) { return 0; } @@ -141,8 +142,9 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt, static really_inline void deactivateQueue(u8 *aa, u32 qi, struct hs_scratch *scratch) { - u32 aaCount = scratch->tctxt.t->activeArrayCount; - u32 qCount = scratch->tctxt.t->queueCount; + const struct RoseEngine *t = scratch->core_info.rose; + u32 aaCount = t->activeArrayCount; + u32 qCount = t->queueCount; /* this is sailing close to the wind with regards to invalidating an * iteration. We are saved by the fact that unsetting does not clear the @@ -343,8 +345,8 @@ int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(tctxt->t, 0, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(scratch->core_info.rose, 0, + scratch->core_info.exhaustionVector); } } @@ -369,8 +371,8 @@ int roseNfaFinalBlastAdaptorNoInternal(u64a offset, ReportID id, return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(tctxt->t, 0, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(scratch->core_info.rose, 0, + scratch->core_info.exhaustionVector); } } @@ -427,7 +429,7 @@ s64a findSecondPlace(struct catchup_pq *pq, s64a loc_limit) { } } -hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, u8 *state, s64a loc, +hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, char *state, s64a loc, struct hs_scratch *scratch) { struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); @@ -518,13 +520,14 @@ static UNUSED int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { struct RoseContext *tctxt = context; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; - const struct internal_report *ri = getInternalReport(scratch->tctxt.t, id); + const struct internal_report *ri = getInternalReport(t, id); DEBUG_PRINTF("called\n"); if (ri->type != INTERNAL_ROSE_CHAIN) { /* INTERNAL_ROSE_CHAIN are not visible externally */ - if (roseCatchUpMPV(tctxt->t, tctxt->state, + if (roseCatchUpMPV(t, scratch->core_info.state, offset - scratch->core_info.buf_offset, scratch) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("done\n"); @@ -548,8 +551,8 @@ int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(tctxt->t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(t, tctxt->curr_qi, + scratch->core_info.exhaustionVector); } } @@ -557,11 +560,12 @@ static UNUSED int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { struct RoseContext *tctxt = context; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); - if (roseCatchUpMPV(tctxt->t, tctxt->state, - offset - scratch->core_info.buf_offset, scratch) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, scratch->core_info.state, + offset - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("done\n"); return MO_HALT_MATCHING; } @@ -577,8 +581,8 @@ int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(tctxt->t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(t, tctxt->curr_qi, + scratch->core_info.exhaustionVector); } } @@ -603,8 +607,8 @@ int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(tctxt->t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(scratch->core_info.rose, tctxt->curr_qi, + scratch->core_info.exhaustionVector); } } @@ -627,8 +631,8 @@ int roseNfaBlastAdaptorNoInternalNoChain(u64a offset, ReportID id, return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(tctxt->t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(scratch->core_info.rose, tctxt->curr_qi, + scratch->core_info.exhaustionVector); } } @@ -637,11 +641,12 @@ int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context) { struct RoseContext *tctxt = context; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); - if (roseCatchUpMPV(tctxt->t, tctxt->state, - offset - scratch->core_info.buf_offset, scratch) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, scratch->core_info.state, + offset - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("roseCatchUpNfas done\n"); return MO_HALT_MATCHING; } @@ -658,8 +663,8 @@ int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(tctxt->t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(t, tctxt->curr_qi, + scratch->core_info.exhaustionVector); } } @@ -799,7 +804,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, return HWLM_CONTINUE_MATCHING; } -void streamInitSufPQ(const struct RoseEngine *t, u8 *state, +void streamInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { assert(scratch->catchup_pq.qm_size == 0); assert(t->outfixBeginQueue != t->outfixEndQueue); @@ -844,7 +849,7 @@ void streamInitSufPQ(const struct RoseEngine *t, u8 *state, } } -void blockInitSufPQ(const struct RoseEngine *t, u8 *state, +void blockInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, char is_small_block) { DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue, t->outfixEndQueue); @@ -910,7 +915,7 @@ void blockInitSufPQ(const struct RoseEngine *t, u8 *state, * safe_loc is ??? */ static rose_inline -hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, u8 *state, s64a safe_loc, +hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, s64a final_loc, struct hs_scratch *scratch) { assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue); @@ -949,7 +954,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, u8 *state, s64a safe_loc, s64a report_ok_loc = tctxt->minNonMpvMatchOffset + 1 - scratch->core_info.buf_offset; - hwlmcb_rv_t rv = roseCatchUpMPV(tctxt->t, state, report_ok_loc, scratch); + hwlmcb_rv_t rv = roseCatchUpMPV(t, state, report_ok_loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } @@ -986,7 +991,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, u8 *state, s64a safe_loc, } static never_inline -hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, u8 *state, s64a loc, +hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, char *state, s64a loc, s64a final_loc, struct hs_scratch *scratch) { struct RoseContext *tctxt = &scratch->tctxt; assert(t->activeArrayCount); @@ -1082,7 +1087,7 @@ exit:; } static really_inline -hwlmcb_rv_t roseCatchUpNfasAndMpv(const struct RoseEngine *t, u8 *state, +hwlmcb_rv_t roseCatchUpNfasAndMpv(const struct RoseEngine *t, char *state, s64a loc, s64a final_loc, struct hs_scratch *scratch) { hwlmcb_rv_t rv = roseCatchUpNfas(t, state, loc, final_loc, scratch); @@ -1098,14 +1103,14 @@ hwlmcb_rv_t roseCatchUpNfasAndMpv(const struct RoseEngine *t, u8 *state, static really_inline hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, char do_full_mpv) { - assert(scratch->tctxt.t->activeArrayCount); /* otherwise use - * roseCatchUpAnchoredOnly */ + const struct RoseEngine *t = scratch->core_info.rose; + assert(t->activeArrayCount); /* otherwise use roseCatchUpAnchoredOnly */ struct RoseContext *tctxt = &scratch->tctxt; u64a current_offset = scratch->core_info.buf_offset + loc; u64a anchored_end; ReportID anchored_report; - currentAnchoredMatch(tctxt->t, tctxt, &anchored_report, &anchored_end); + currentAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); DEBUG_PRINTF("am current_offset %llu\n", current_offset); DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset); @@ -1115,7 +1120,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, assert(current_offset > tctxt->minMatchOffset); assert(anchored_end != ANCHORED_MATCH_SENTINEL); - hwlmcb_rv_t rv = buildSufPQ(tctxt->t, tctxt->state, + hwlmcb_rv_t rv = buildSufPQ(t, scratch->core_info.state, anchored_end - scratch->core_info.buf_offset, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { @@ -1123,9 +1128,9 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, } /* buildSufPQ may have caught only part of the pq upto anchored_end */ - rv = roseCatchUpNfas(tctxt->t, tctxt->state, - anchored_end - scratch->core_info.buf_offset, loc, - scratch); + rv = roseCatchUpNfas(t, scratch->core_info.state, + anchored_end - scratch->core_info.buf_offset, loc, + scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; @@ -1134,7 +1139,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, while (anchored_report != MO_INVALID_IDX && anchored_end <= current_offset) { if (anchored_end != tctxt->minMatchOffset) { - rv = roseCatchUpNfasAndMpv(tctxt->t, tctxt->state, + rv = roseCatchUpNfasAndMpv(t, scratch->core_info.state, anchored_end - scratch->core_info.buf_offset, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { @@ -1156,28 +1161,28 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, return HWLM_TERMINATE_MATCHING; } next: - nextAnchoredMatch(tctxt->t, tctxt, &anchored_report, &anchored_end); + nextAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); DEBUG_PRINTF("catch up %u %llu\n", anchored_report, anchored_end); } if (current_offset == tctxt->minMatchOffset) { DEBUG_PRINTF("caught up\n"); - assert(scratch->catchup_pq.qm_size <= tctxt->t->outfixEndQueue); + assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue); return HWLM_CONTINUE_MATCHING; } - rv = roseCatchUpNfas(tctxt->t, tctxt->state, loc, loc, scratch); + rv = roseCatchUpNfas(t, scratch->core_info.state, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - assert(scratch->catchup_pq.qm_size <= tctxt->t->outfixEndQueue + assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue || rv == HWLM_TERMINATE_MATCHING); if (do_full_mpv) { /* finish off any outstanding chained matches */ - rv = roseCatchUpMPV(tctxt->t, tctxt->state, loc, scratch); + rv = roseCatchUpMPV(t, scratch->core_info.state, loc, scratch); } DEBUG_PRINTF("catchup all done %llu\n", current_offset); @@ -1201,24 +1206,22 @@ hwlmcb_rv_t roseCatchUpSufAndChains(s64a loc, struct hs_scratch *scratch) { assert(scratch->core_info.buf_offset + loc > scratch->tctxt.minNonMpvMatchOffset); - hwlmcb_rv_t rv = buildSufPQ(scratch->tctxt.t, scratch->tctxt.state, loc, - loc, scratch); + const struct RoseEngine *t = scratch->core_info.rose; + char *state = scratch->core_info.state; + + hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - rv = roseCatchUpNfas(scratch->tctxt.t, scratch->tctxt.state, loc, loc, - scratch); - + rv = roseCatchUpNfas(t, state, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - rv = roseCatchUpMPV(scratch->tctxt.t, scratch->tctxt.state, loc, scratch); - + rv = roseCatchUpMPV(t, state, loc, scratch); assert(rv != HWLM_CONTINUE_MATCHING - || scratch->catchup_pq.qm_size <= scratch->tctxt.t->outfixEndQueue); - + || scratch->catchup_pq.qm_size <= t->outfixEndQueue); return rv; } @@ -1228,29 +1231,31 @@ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) { assert(scratch->core_info.buf_offset + loc > scratch->tctxt.minNonMpvMatchOffset); - hwlmcb_rv_t rv = buildSufPQ(scratch->tctxt.t, scratch->tctxt.state, loc, - loc, scratch); + const struct RoseEngine *t = scratch->core_info.rose; + char *state = scratch->core_info.state; + + hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - rv = roseCatchUpNfas(scratch->tctxt.t, scratch->tctxt.state, loc, loc, - scratch); - assert(rv != HWLM_CONTINUE_MATCHING - || scratch->catchup_pq.qm_size <= scratch->tctxt.t->outfixEndQueue); + rv = roseCatchUpNfas(t, state, loc, loc, scratch); + assert(rv != HWLM_CONTINUE_MATCHING || + scratch->catchup_pq.qm_size <= t->outfixEndQueue); return rv; } hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch) { + const struct RoseEngine *t = scratch->core_info.rose; struct RoseContext *tctxt = &scratch->tctxt; - assert(!tctxt->t->activeArrayCount); /* otherwise use roseCatchUpAll */ + assert(!t->activeArrayCount); /* otherwise use roseCatchUpAll */ u64a current_offset = scratch->core_info.buf_offset + loc; u64a anchored_end; ReportID anchored_report; - currentAnchoredMatch(tctxt->t, tctxt, &anchored_report, &anchored_end); + currentAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); DEBUG_PRINTF("am current_offset %llu\n", current_offset); @@ -1272,7 +1277,7 @@ hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch) { return HWLM_TERMINATE_MATCHING; } next: - nextAnchoredMatch(tctxt->t, tctxt, &anchored_report, &anchored_end); + nextAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); DEBUG_PRINTF("catch up %u %llu\n", anchored_report, anchored_end); } diff --git a/src/rose/catchup.h b/src/rose/catchup.h index b54a49b0f..bbbaa987d 100644 --- a/src/rose/catchup.h +++ b/src/rose/catchup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,16 +73,16 @@ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch); hwlmcb_rv_t roseCatchUpAnchoredAndSuf(s64a loc, struct hs_scratch *scratch); -hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, u8 *state, s64a loc, +hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, char *state, s64a loc, struct hs_scratch *scratch); -void blockInitSufPQ(const struct RoseEngine *t, u8 *state, +void blockInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, char is_small_block); -void streamInitSufPQ(const struct RoseEngine *t, u8 *state, +void streamInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch); static really_inline -hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, u8 *state, +hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, char *state, s64a loc, struct hs_scratch *scratch) { u64a cur_offset = loc + scratch->core_info.buf_offset; assert(cur_offset >= scratch->tctxt.minMatchOffset); @@ -140,7 +140,7 @@ u64a currentAnchoredEnd(const struct RoseEngine *t, struct RoseContext *tctxt) { /* catches up nfas, anchored matches and the mpv */ static rose_inline -hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, u8 *state, u64a end, +hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, char *state, u64a end, struct hs_scratch *scratch, char in_anchored) { /* no need to catch up if we are at the same offset as last time */ if (end <= scratch->tctxt.minMatchOffset) { @@ -156,7 +156,6 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, u8 *state, u64a end, return roseCatchUpMPV(t, state, loc, scratch); } - assert(t == scratch->tctxt.t); assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset); u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt); hwlmcb_rv_t rv; @@ -189,7 +188,7 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, u8 *state, u64a end, * and suf/outfixes. The MPV will be run only to intersperse matches in * the output match stream if external matches are raised. */ static rose_inline -hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, u8 *state, +hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, char *state, u64a end, struct hs_scratch *scratch, char in_anchored) { /* no need to catch up if we are at the same offset as last time */ @@ -201,7 +200,6 @@ hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, u8 *state, s64a loc = end - scratch->core_info.buf_offset; - assert(t == scratch->tctxt.t); assert(t->activeArrayCount); /* mpv is in active array */ assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset); u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt); diff --git a/src/rose/eod.c b/src/rose/eod.c index b95a952e4..1fa2c6dcc 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -33,11 +33,10 @@ #include "util/fatbit.h" static really_inline -void initContext(const struct RoseEngine *t, u8 *state, u64a offset, +void initContext(const struct RoseEngine *t, char *state, u64a offset, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, void *ctx) { struct RoseContext *tctxt = &scratch->tctxt; - tctxt->t = t; tctxt->groups = loadGroups(t, state); /* TODO: diff groups for eod */ tctxt->lit_offset_adjust = scratch->core_info.buf_offset - scratch->core_info.hlen @@ -45,7 +44,6 @@ void initContext(const struct RoseEngine *t, u8 *state, u64a offset, tctxt->delayLastEndOffset = offset; tctxt->lastEndOffset = offset; tctxt->filledDelayedSlots = 0; - tctxt->state = state; tctxt->cb = callback; tctxt->cb_som = som_callback; tctxt->userCtx = ctx; @@ -128,7 +126,7 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, * or outfix) NFAs. */ static rose_inline -void roseCheckNfaEod(const struct RoseEngine *t, u8 *state, +void roseCheckNfaEod(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u64a offset, const char is_streaming) { if (!t->eodNfaIterOffset) { @@ -176,8 +174,8 @@ void roseCheckNfaEod(const struct RoseEngine *t, u8 *state, } static rose_inline -void cleanupAfterEodMatcher(const struct RoseEngine *t, u8 *state, u64a offset, - struct hs_scratch *scratch) { +void cleanupAfterEodMatcher(const struct RoseEngine *t, char *state, + u64a offset, struct hs_scratch *scratch) { struct RoseContext *tctxt = &scratch->tctxt; // Flush history to make sure it's consistent. @@ -185,7 +183,7 @@ void cleanupAfterEodMatcher(const struct RoseEngine *t, u8 *state, u64a offset, } static rose_inline -void roseCheckEodSuffixes(const struct RoseEngine *t, u8 *state, u64a offset, +void roseCheckEodSuffixes(const struct RoseEngine *t, char *state, u64a offset, struct hs_scratch *scratch) { const u8 *aa = getActiveLeafArray(t, state); const u32 aaCount = t->activeArrayCount; @@ -243,7 +241,7 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset, } static really_inline -void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, +void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, struct hs_scratch *scratch, const char is_streaming) { assert(t); assert(scratch->core_info.buf || scratch->core_info.hbuf); @@ -289,10 +287,9 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, } } -void roseEodExec(const struct RoseEngine *t, u8 *state, u64a offset, +void roseEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, void *context) { - assert(state); assert(scratch); assert(callback); assert(context); @@ -308,13 +305,16 @@ void roseEodExec(const struct RoseEngine *t, u8 *state, u64a offset, return; } + char *state = scratch->core_info.state; + assert(state); + initContext(t, state, offset, scratch, callback, som_callback, context); roseEodExec_i(t, state, offset, scratch, 1); } static rose_inline -void prepForEod(const struct RoseEngine *t, u8 *state, size_t length, +void prepForEod(const struct RoseEngine *t, char *state, size_t length, struct RoseContext *tctxt) { roseFlushLastByteHistory(t, state, length, tctxt); tctxt->lastEndOffset = length; @@ -328,7 +328,7 @@ void roseBlockEodExec(const struct RoseEngine *t, u64a offset, assert(!can_stop_matching(scratch)); - u8 *state = (u8 *)scratch->core_info.state; + char *state = scratch->core_info.state; // Ensure that history is correct before we look for EOD matches prepForEod(t, state, scratch->core_info.len, &scratch->tctxt); diff --git a/src/rose/init.c b/src/rose/init.c index 1cb26821e..1ec520c33 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,7 @@ #include static really_inline -void init_rstate(const struct RoseEngine *t, u8 *state) { +void init_rstate(const struct RoseEngine *t, char *state) { // Set runtime state: we take our initial groups from the RoseEngine. DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups); struct RoseRuntimeState *rstate = getRuntimeState(state); @@ -52,7 +52,7 @@ void init_rstate(const struct RoseEngine *t, u8 *state) { } static really_inline -void init_outfixes(const struct RoseEngine *t, u8 *state) { +void init_outfixes(const struct RoseEngine *t, char *state) { /* The active leaf array has been init'ed by the scatter with outfix * bits set on */ @@ -74,7 +74,7 @@ void init_outfixes(const struct RoseEngine *t, u8 *state) { } } -void roseInitState(const struct RoseEngine *t, u8 *state) { +void roseInitState(const struct RoseEngine *t, char *state) { assert(t); assert(state); diff --git a/src/rose/init.h b/src/rose/init.h index 9aee05865..b37053b26 100644 --- a/src/rose/init.h +++ b/src/rose/init.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,7 +39,7 @@ */ static really_inline -void init_state(const struct RoseEngine *t, u8 *state) { +void init_state(const struct RoseEngine *t, char *state) { scatter(state, t, &t->state_init); } diff --git a/src/rose/match.c b/src/rose/match.c index 72f2a167c..f614423b7 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -75,8 +75,8 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, void *ctx) { struct hs_scratch *scratch = ctx; struct RoseContext *tctx = &scratch->tctxt; - const struct RoseEngine *t = tctx->t; struct core_info *ci = &scratch->core_info; + const struct RoseEngine *t = ci->rose; size_t rb_len = MIN(ci->hlen, t->delayRebuildLength); u64a real_end = ci->buf_offset - rb_len + end + 1; // index after last byte @@ -123,8 +123,8 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, static rose_inline void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, u64a end) { - const struct RoseEngine *t = tctxt->t; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; u8 **anchoredRows = getAnchoredLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", reportId, end); @@ -148,8 +148,8 @@ static rose_inline void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id, u64a end) { assert(end); - const struct RoseEngine *t = tctxt->t; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; u8 **anchoredLiteralRows = getAnchoredLiteralLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); @@ -173,7 +173,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, struct hs_scratch *scratch = tctxtToScratch(tctxt); struct core_info *ci = &scratch->core_info; - u8 *aa = getActiveLeafArray(t, tctxt->state); + u8 *aa = getActiveLeafArray(t, scratch->core_info.state); u32 aaCount = t->activeArrayCount; struct fatbit *activeQueues = scratch->aqa; u32 qCount = t->queueCount; @@ -255,9 +255,9 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, /* handles catchup, som, cb, etc */ static really_inline -hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, u8 *state, - struct RoseContext *tctxt, ReportID id, u64a offset, - char in_anchored) { +hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state, + struct RoseContext *tctxt, ReportID id, + u64a offset, char in_anchored) { const struct internal_report *ri = getInternalReport(t, id); if (ri) { @@ -284,7 +284,8 @@ hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, u8 *state, static really_inline hwlmcb_rv_t roseHandleAnchoredDirectReport(const struct RoseEngine *t, - u8 *state, struct RoseContext *tctxt, + char *state, + struct RoseContext *tctxt, u64a real_end, ReportID report) { DEBUG_PRINTF("direct report %u, real_end=%llu\n", report, real_end); @@ -300,9 +301,9 @@ hwlmcb_rv_t roseHandleAnchoredDirectReport(const struct RoseEngine *t, int roseAnchoredCallback(u64a end, u32 id, void *ctx) { struct RoseContext *tctxt = ctx; - const struct RoseEngine *t = tctxt->t; - u8 *state = tctxt->state; struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + char *state = ci->state; + const struct RoseEngine *t = ci->rose; u64a real_end = ci->buf_offset + end; // index after last byte @@ -387,7 +388,8 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, struct RoseContext *tctxt, char in_delay_play, char in_anch_playback) { /* assert(!tctxt->in_anchored); */ - u8 *state = tctxt->state; + struct hs_scratch *scratch = tctxtToScratch(tctxt); + char *state = scratch->core_info.state; DEBUG_PRINTF("id=%u\n", id); @@ -444,30 +446,31 @@ hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, } static rose_inline -hwlmcb_rv_t playDelaySlot(struct RoseContext *tctxt, const u8 *delaySlotBase, - size_t delaySlotSize, u32 vicIndex, u64a offset) { +hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, + const u8 *delaySlotBase, size_t delaySlotSize, + u32 vicIndex, u64a offset) { /* assert(!tctxt->in_anchored); */ assert(vicIndex < DELAY_SLOT_COUNT); const u8 *vicSlot = delaySlotBase + delaySlotSize * vicIndex; - u32 delay_count = tctxt->t->delay_count; + u32 delay_count = t->delay_count; - if (offset < tctxt->t->floatingMinLiteralMatchOffset) { + if (offset < t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("too soon\n"); return HWLM_CONTINUE_MATCHING; } - roseFlushLastByteHistory(tctxt->t, tctxt->state, offset, tctxt); + struct hs_scratch *scratch = tctxtToScratch(tctxt); + roseFlushLastByteHistory(t, scratch->core_info.state, offset, tctxt); tctxt->lastEndOffset = offset; for (u32 it = mmbit_iterate(vicSlot, delay_count, MMB_INVALID); it != MMB_INVALID; it = mmbit_iterate(vicSlot, delay_count, it)) { - u32 literal_id = tctxt->t->delay_base_id + it; + u32 literal_id = t->delay_base_id + it; UNUSED rose_group old_groups = tctxt->groups; DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", literal_id, offset); - hwlmcb_rv_t rv = roseProcessDelayedMatch(tctxt->t, offset, literal_id, - tctxt); + hwlmcb_rv_t rv = roseProcessDelayedMatch(t, offset, literal_id, tctxt); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* delayed literals can't safely set groups. @@ -485,20 +488,21 @@ hwlmcb_rv_t playDelaySlot(struct RoseContext *tctxt, const u8 *delaySlotBase, } static really_inline -hwlmcb_rv_t flushAnchoredLiteralAtLoc(struct RoseContext *tctxt, u32 curr_loc) { +hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, + struct RoseContext *tctxt, u32 curr_loc) { u8 *curr_row = getAnchoredLiteralLog(tctxtToScratch(tctxt))[curr_loc - 1]; - u32 region_width = tctxt->t->anchored_count; + u32 region_width = t->anchored_count; DEBUG_PRINTF("report matches at curr loc\n"); for (u32 it = mmbit_iterate(curr_row, region_width, MMB_INVALID); it != MMB_INVALID; it = mmbit_iterate(curr_row, region_width, it)) { DEBUG_PRINTF("it = %u/%u\n", it, region_width); - u32 literal_id = tctxt->t->anchored_base_id + it; + u32 literal_id = t->anchored_base_id + it; rose_group old_groups = tctxt->groups; DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", literal_id, curr_loc); - hwlmcb_rv_t rv = roseProcessDelayedAnchoredMatch(tctxt->t, curr_loc, + hwlmcb_rv_t rv = roseProcessDelayedAnchoredMatch(t, curr_loc, literal_id, tctxt); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); @@ -534,9 +538,11 @@ u32 anchored_it_begin(struct RoseContext *tctxt) { } static really_inline -hwlmcb_rv_t flushAnchoredLiterals(struct RoseContext *tctxt, +hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, + struct RoseContext *tctxt, u32 *anchored_it_param, u64a to_off) { struct hs_scratch *scratch = tctxtToScratch(tctxt); + char *state = scratch->core_info.state; u32 anchored_it = *anchored_it_param; /* catch up any remaining anchored matches */ for (; anchored_it != MMB_INVALID && anchored_it < to_off; @@ -544,10 +550,10 @@ hwlmcb_rv_t flushAnchoredLiterals(struct RoseContext *tctxt, assert(anchored_it < scratch->anchored_literal_region_len); DEBUG_PRINTF("loc_it = %u\n", anchored_it); u32 curr_off = anchored_it + 1; - roseFlushLastByteHistory(tctxt->t, tctxt->state, curr_off, tctxt); + roseFlushLastByteHistory(t, state, curr_off, tctxt); tctxt->lastEndOffset = curr_off; - if (flushAnchoredLiteralAtLoc(tctxt, curr_off) + if (flushAnchoredLiteralAtLoc(t, tctxt, curr_off) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -558,9 +564,9 @@ hwlmcb_rv_t flushAnchoredLiterals(struct RoseContext *tctxt, } static really_inline -hwlmcb_rv_t playVictims(struct RoseContext *tctxt, u32 *anchored_it, - u64a lastEnd, u64a victimDelaySlots, u8 *delaySlotBase, - size_t delaySlotSize) { +hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt, + u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots, + u8 *delaySlotBase, size_t delaySlotSize) { /* assert (!tctxt->in_anchored); */ while (victimDelaySlots) { @@ -568,12 +574,12 @@ hwlmcb_rv_t playVictims(struct RoseContext *tctxt, u32 *anchored_it, DEBUG_PRINTF("vic = %u\n", vic); u64a vicOffset = vic + (lastEnd & ~(u64a)DELAY_MASK); - if (flushAnchoredLiterals(tctxt, anchored_it, vicOffset) + if (flushAnchoredLiterals(t, tctxt, anchored_it, vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - if (playDelaySlot(tctxt, delaySlotBase, delaySlotSize, + if (playDelaySlot(t, tctxt, delaySlotBase, delaySlotSize, vic % DELAY_SLOT_COUNT, vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -585,6 +591,9 @@ hwlmcb_rv_t playVictims(struct RoseContext *tctxt, u32 *anchored_it, /* call flushQueuedLiterals instead */ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; + /* assert(!tctxt->in_anchored); */ u64a lastEnd = tctxt->delayLastEndOffset; DEBUG_PRINTF("flushing backed up matches @%llu up from %llu\n", currEnd, @@ -600,8 +609,8 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { } { - u8 *delaySlotBase = getDelaySlots(tctxtToScratch(tctxt)); - size_t delaySlotSize = tctxt->t->delay_slot_size; + u8 *delaySlotBase = getDelaySlots(scratch); + size_t delaySlotSize = t->delay_slot_size; u32 lastIndex = lastEnd & DELAY_MASK; u32 currIndex = currEnd & DELAY_MASK; @@ -654,7 +663,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { second_half, victimDelaySlots, lastIndex); } - if (playVictims(tctxt, &anchored_it, lastEnd, victimDelaySlots, + if (playVictims(t, tctxt, &anchored_it, lastEnd, victimDelaySlots, delaySlotBase, delaySlotSize) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -662,27 +671,29 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { } anchored_leftovers:; - hwlmcb_rv_t rv = flushAnchoredLiterals(tctxt, &anchored_it, currEnd); + hwlmcb_rv_t rv = flushAnchoredLiterals(t, tctxt, &anchored_it, currEnd); tctxt->delayLastEndOffset = currEnd; return rv; } hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { struct RoseContext *tctx = ctxt; + struct hs_scratch *scratch = tctxtToScratch(tctx); + const struct RoseEngine *t = scratch->core_info.rose; + u64a real_end = end + tctx->lit_offset_adjust; #if defined(DEBUG) - struct core_info *ci = &tctxtToScratch(tctx)->core_info; DEBUG_PRINTF("MATCH id=%u offsets=[%llu,%llu]: ", id, start + tctx->lit_offset_adjust, real_end); - printMatch(ci, start + tctx->lit_offset_adjust, real_end); + printMatch(&scratch->core_info, start + tctx->lit_offset_adjust, real_end); printf("\n"); #endif DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset); DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); - if (can_stop_matching(tctxtToScratch(tctx))) { + if (can_stop_matching(scratch)) { DEBUG_PRINTF("received a match when we're already dead!\n"); return HWLM_TERMINATE_MATCHING; } @@ -690,8 +701,8 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { hwlmcb_rv_t rv = flushQueuedLiterals(tctx, real_end); /* flushDelayed may have advanced tctx->lastEndOffset */ - if (real_end >= tctx->t->floatingMinLiteralMatchOffset) { - roseFlushLastByteHistory(tctx->t, tctx->state, real_end, tctx); + if (real_end >= t->floatingMinLiteralMatchOffset) { + roseFlushLastByteHistory(t, scratch->core_info.state, real_end, tctx); tctx->lastEndOffset = real_end; } @@ -700,7 +711,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { } size_t match_len = end - start + 1; - rv = roseProcessMainMatch(tctx->t, real_end, match_len, id, tctx); + rv = roseProcessMainMatch(t, real_end, match_len, id, tctx); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); @@ -708,7 +719,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { return tctx->groups; } - assert(can_stop_matching(tctxtToScratch(tctx))); + assert(can_stop_matching(scratch)); DEBUG_PRINTF("user requested halt\n"); return HWLM_TERMINATE_MATCHING; } diff --git a/src/rose/match.h b/src/rose/match.h index cab172673..f3b8fe73c 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -90,7 +90,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->end = 0; q->cur = 0; q->state = scratch->fullState + info->fullStateOffset; - q->streamState = (char *)tctxt->state + info->stateOffset; + q->streamState = scratch->core_info.state + info->stateOffset; q->offset = scratch->core_info.buf_offset; q->buffer = scratch->core_info.buf; q->length = scratch->core_info.len; @@ -129,7 +129,7 @@ void initRoseQueue(const struct RoseEngine *t, u32 qi, if (left->transient) { q->streamState = (char *)scratch->tstate + info->stateOffset; } else { - q->streamState = (char *)tctxt->state + info->stateOffset; + q->streamState = scratch->core_info.state + info->stateOffset; } q->offset = scratch->core_info.buf_offset; @@ -161,7 +161,7 @@ void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) { } static really_inline -void storeRoseDelay(const struct RoseEngine *t, u8 *state, +void storeRoseDelay(const struct RoseEngine *t, char *state, const struct LeftNfaInfo *left, u32 loc) { u32 di = left->lagIndex; if (di == ROSE_OFFSET_INVALID) { @@ -176,7 +176,7 @@ void storeRoseDelay(const struct RoseEngine *t, u8 *state, } static really_inline -void setAsZombie(const struct RoseEngine *t, u8 *state, +void setAsZombie(const struct RoseEngine *t, char *state, const struct LeftNfaInfo *left) { u32 di = left->lagIndex; assert(di != ROSE_OFFSET_INVALID); @@ -191,7 +191,7 @@ void setAsZombie(const struct RoseEngine *t, u8 *state, /* loadRoseDelay MUST NOT be called on the first stream write as it is only * initialized for running nfas on stream boundaries */ static really_inline -u32 loadRoseDelay(const struct RoseEngine *t, const u8 *state, +u32 loadRoseDelay(const struct RoseEngine *t, const char *state, const struct LeftNfaInfo *left) { u32 di = left->lagIndex; if (di == ROSE_OFFSET_INVALID) { @@ -205,7 +205,7 @@ u32 loadRoseDelay(const struct RoseEngine *t, const u8 *state, } static really_inline -char isZombie(const struct RoseEngine *t, const u8 *state, +char isZombie(const struct RoseEngine *t, const char *state, const struct LeftNfaInfo *left) { u32 di = left->lagIndex; assert(di != ROSE_OFFSET_INVALID); @@ -261,7 +261,7 @@ hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt } static rose_inline -void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, +void roseFlushLastByteHistory(const struct RoseEngine *t, char *state, u64a currEnd, struct RoseContext *tctxt) { if (!t->lastByteHistoryIterOffset) { return; @@ -292,7 +292,7 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, } static rose_inline -int roseHasInFlightMatches(const struct RoseEngine *t, u8 *state, +int roseHasInFlightMatches(const struct RoseEngine *t, char *state, const struct hs_scratch *scratch) { if (scratch->al_log_sum) { DEBUG_PRINTF("anchored literals in log\n"); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index b4d4aeeed..e8e60c7ff 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -46,18 +46,14 @@ #include "util/multibit.h" static rose_inline -int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind, +int roseCheckBenefits(const struct core_info *ci, u64a end, u32 mask_rewind, const u8 *and_mask, const u8 *exp_mask) { - DEBUG_PRINTF("am offset = %zu, em offset = %zu\n", - and_mask - (const u8 *)tctxt->t, - exp_mask - (const u8 *)tctxt->t); const u8 *data; // If the check works over part of the history and part of the buffer, we // create a temporary copy of the data in here so it's contiguous. u8 temp[MAX_MASK2_WIDTH]; - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; s64a buffer_offset = (s64a)end - ci->buf_offset; DEBUG_PRINTF("rel offset %lld\n", buffer_offset); if (buffer_offset >= mask_rewind) { @@ -215,7 +211,7 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, char is_mpv, char in_anchored, char in_catchup) { struct RoseContext *tctxt = &scratch->tctxt; - u8 *aa = getActiveLeafArray(t, tctxt->state); + u8 *aa = getActiveLeafArray(t, scratch->core_info.state); struct fatbit *activeQueues = scratch->aqa; u32 aaCount = t->activeArrayCount; u32 qCount = t->queueCount; @@ -237,17 +233,17 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, if (loc + scratch->core_info.buf_offset <= tctxt->minNonMpvMatchOffset) { DEBUG_PRINTF("flushing chained\n"); - if (roseCatchUpMPV(t, tctxt->state, loc, scratch) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, scratch->core_info.state, loc, + scratch) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } goto done_queue_empty; } } - if (roseCatchUpTo(t, tctxt->state, loc + scratch->core_info.buf_offset, - scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpTo(t, scratch->core_info.state, + loc + scratch->core_info.buf_offset, scratch, + in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } else { @@ -255,7 +251,7 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, assert(is_mpv); DEBUG_PRINTF("flushing chained\n"); tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (roseCatchUpMPV(t, tctxt->state, loc, scratch) + if (roseCatchUpMPV(t, scratch->core_info.state, loc, scratch) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -296,8 +292,8 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, char in_anchored) { DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - u8 *aa = getActiveLeafArray(t, tctxt->state); struct hs_scratch *scratch = tctxtToScratch(tctxt); + u8 *aa = getActiveLeafArray(t, scratch->core_info.state); const u32 aaCount = t->activeArrayCount; const u32 qCount = t->queueCount; struct mq *q = &scratch->queues[qi]; @@ -374,10 +370,12 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, assert(leftfixLag <= left->maxLag); struct mq *q = scratch->queues + qi; + char *state = scratch->core_info.state; + u8 *activeLeftArray = getActiveLeftArray(t, state); u32 qCount = t->queueCount; u32 arCount = t->activeLeftCount; - if (!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, ri)) { + if (!mmbit_isset(activeLeftArray, arCount, ri)) { DEBUG_PRINTF("engine is dead nothing to see here\n"); return 0; } @@ -389,7 +387,7 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset && !fatbit_isset(scratch->aqa, qCount, qi) - && isZombie(t, tctxt->state, left)) { + && isZombie(t, state, left)) { DEBUG_PRINTF("zombie\n"); return 1; } @@ -402,7 +400,7 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, if (left->transient) { sp = -(s32)ci->hlen; } else { - sp = -(s32)loadRoseDelay(t, tctxt->state, left); + sp = -(s32)loadRoseDelay(t, state, left); } /* transient nfas are always started fresh -> state not maintained @@ -441,7 +439,7 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, if (infixTooOld(q, loc)) { DEBUG_PRINTF("infix %u died of old age\n", ri); scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + mmbit_unset(activeLeftArray, arCount, ri); return 0; } @@ -451,7 +449,7 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + mmbit_unset(activeLeftArray, arCount, ri); return 0; } @@ -464,9 +462,8 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); if (!rv) { /* nfa is dead */ DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); - assert(!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, - ri)); + mmbit_unset(activeLeftArray, arCount, ri); + assert(!mmbit_isset(activeLeftArray, arCount, ri)); tctxt->groups &= left->squash_mask; return 0; } @@ -503,7 +500,9 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, struct mq *q = tctxtToScratch(tctxt)->queues + qi; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); + struct hs_scratch *scratch = tctxtToScratch(tctxt); + char *state = scratch->core_info.state; + u8 *activeLeftArray = getActiveLeftArray(t, state); const u32 arCount = t->activeLeftCount; char alive = mmbit_set(activeLeftArray, arCount, ri); @@ -512,11 +511,11 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, return; } - struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; + struct fatbit *aqa = scratch->aqa; const u32 qCount = t->queueCount; if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && - !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) { + !fatbit_isset(aqa, qCount, qi) && isZombie(t, state, left)) { DEBUG_PRINTF("yawn - zombie\n"); return; } @@ -531,7 +530,7 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, DEBUG_PRINTF("initing %u\n", qi); initRoseQueue(t, qi, left, tctxt); if (alive) { - s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); + s32 sp = -(s32)loadRoseDelay(t, state, left); pushQueueAt(q, 0, MQE_START, sp); loadStreamState(q->nfa, q, sp); } else { @@ -562,8 +561,8 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, /* handles the firing of external matches */ static rose_inline -hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id, - u64a end, struct RoseContext *tctxt, +hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, char *state, + ReportID id, u64a end, struct RoseContext *tctxt, char in_anchored) { struct hs_scratch *scratch = tctxtToScratch(tctxt); @@ -603,7 +602,7 @@ hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id, * up */ static rose_inline hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, - u8 *state, ReportID r, u64a end, + char *state, ReportID r, u64a end, struct RoseContext *tctxt, char in_anchored) { struct hs_scratch *scratch = tctxtToScratch(tctxt); @@ -617,7 +616,7 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, } static rose_inline -hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end, +hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, char *state, u64a end, struct RoseContext *tctxt, char in_anchored) { struct hs_scratch *scratch = tctxtToScratch(tctxt); @@ -640,7 +639,7 @@ hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end, } static really_inline -hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id, +hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, char *state, ReportID id, u64a end, struct RoseContext *tctxt, char in_anchored) { struct hs_scratch *scratch = tctxtToScratch(tctxt); @@ -663,7 +662,7 @@ hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id, } static rose_inline -hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state, +hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, ReportID id, u64a start, u64a end, struct RoseContext *tctxt, char in_anchored) { if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored) @@ -701,9 +700,9 @@ hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state, } static rose_inline -hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, u8 *state, ReportID id, - u64a start, u64a end, struct RoseContext *tctxt, - char in_anchored) { +hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, char *state, + ReportID id, u64a start, u64a end, + struct RoseContext *tctxt, char in_anchored) { DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", id, start, end, tctxt->minMatchOffset); @@ -903,6 +902,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, // allow the program to squash groups). int work_done = 0; + struct hs_scratch *scratch = tctxtToScratch(tctxt); + assert(*(const u8 *)pc != ROSE_INSTR_END); for (;;) { @@ -925,7 +926,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(CHECK_LIT_MASK) { assert(match_len); - if (!roseCheckBenefits(tctxt, end, match_len, ri->and_mask.a8, + struct core_info *ci = &scratch->core_info; + if (!roseCheckBenefits(ci, end, match_len, ri->and_mask.a8, ri->cmp_mask.a8)) { DEBUG_PRINTF("halt: failed mask check\n"); return HWLM_CONTINUE_MATCHING; @@ -953,7 +955,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_ONLY_EOD) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + struct core_info *ci = &scratch->core_info; if (end != ci->buf_offset + ci->len) { DEBUG_PRINTF("should only match at end of data\n"); assert(ri->fail_jump); // must progress @@ -975,7 +977,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_NOT_HANDLED) { - struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles; + struct fatbit *handled = scratch->handled_roles; if (fatbit_set(handled, t->handledKeyCount, ri->key)) { DEBUG_PRINTF("key %u already set\n", ri->key); assert(ri->fail_jump); // must progress @@ -1042,7 +1044,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT) { - if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt, + if (roseHandleMatch(t, scratch->core_info.state, + ri->report, end, tctxt, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1051,9 +1054,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_CHAIN) { - if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report, - end, tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { + if (roseCatchUpAndHandleChainMatch( + t, scratch->core_info.state, ri->report, end, + tctxt, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1070,7 +1073,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_INT) { - if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt, + if (roseHandleSom(t, scratch->core_info.state, ri->report, + end, tctxt, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1079,8 +1083,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { - if (roseHandleSomSom(t, tctxt->state, ri->report, som, end, - tctxt, + if (roseHandleSomSom(t, scratch->core_info.state, + ri->report, som, end, tctxt, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1089,8 +1093,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_KNOWN) { - if (roseHandleSomMatch(t, tctxt->state, ri->report, som, end, - tctxt, in_anchored) == + if (roseHandleSomMatch(t, scratch->core_info.state, ri->report, + som, end, tctxt, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1100,8 +1104,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(SET_STATE) { DEBUG_PRINTF("set state index %u\n", ri->index); - mmbit_set(getRoleState(tctxt->state), t->rolesWithStateCount, - ri->index); + mmbit_set(getRoleState(scratch->core_info.state), + t->rolesWithStateCount, ri->index); work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -1125,8 +1129,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(CHECK_STATE) { DEBUG_PRINTF("check state %u\n", ri->index); - if (!mmbit_isset(getRoleState(tctxt->state), - t->rolesWithStateCount, ri->index)) { + const u8 *roles = getRoleState(scratch->core_info.state); + if (!mmbit_isset(roles, t->rolesWithStateCount, ri->index)) { DEBUG_PRINTF("state not on\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; @@ -1141,10 +1145,11 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, getByOffset(t, ri->iter_offset); assert(ISALIGNED(it)); + const u8 *roles = getRoleState(scratch->core_info.state); + u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(getRoleState(tctxt->state), - t->rolesWithStateCount, &idx, - it, si_state); + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); if (i == MMB_INVALID) { DEBUG_PRINTF("no states in sparse iter are on\n"); assert(ri->fail_jump); // must progress @@ -1152,7 +1157,6 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, continue; } - struct hs_scratch *scratch = tctxtToScratch(tctxt); fatbit_clear(scratch->handled_roles); const u32 *jumps = getByOffset(t, ri->jump_table); @@ -1170,9 +1174,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, getByOffset(t, ri->iter_offset); assert(ISALIGNED(it)); + const u8 *roles = getRoleState(scratch->core_info.state); + u32 idx = 0; - u32 i = mmbit_sparse_iter_next(getRoleState(tctxt->state), - t->rolesWithStateCount, + u32 i = mmbit_sparse_iter_next(roles, t->rolesWithStateCount, ri->state, &idx, it, si_state); if (i == MMB_INVALID) { DEBUG_PRINTF("no more states in sparse iter are on\n"); diff --git a/src/rose/rose.h b/src/rose/rose.h index 22df20d61..c855795e4 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,7 @@ #include "util/multibit.h" // Initialise state space for engine use. -void roseInitState(const struct RoseEngine *t, u8 *state); +void roseInitState(const struct RoseEngine *t, char *state); void roseBlockEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch); @@ -119,11 +119,11 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, } /* assumes core_info in scratch has been init to point to data */ -void roseStreamExec(const struct RoseEngine *t, u8 *state, - struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback, void *context); +void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, + RoseCallback callback, RoseCallbackSom som_callback, + void *context); -void roseEodExec(const struct RoseEngine *t, u8 *state, u64a offset, +void roseEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, void *context); diff --git a/src/rose/runtime.h b/src/rose/runtime.h index d71c32d63..a8587538d 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -57,7 +57,7 @@ /** \brief Fetch runtime state ptr. */ static really_inline -struct RoseRuntimeState *getRuntimeState(u8 *state) { +struct RoseRuntimeState *getRuntimeState(char *state) { struct RoseRuntimeState *rs = (struct RoseRuntimeState *)(state); assert(ISALIGNED_N(rs, 8)); return rs; @@ -70,20 +70,20 @@ const void *getByOffset(const struct RoseEngine *t, u32 offset) { } static really_inline -void *getRoleState(u8 *state) { +void *getRoleState(char *state) { return state + sizeof(struct RoseRuntimeState); } /** \brief Fetch the active array for suffix nfas. */ static really_inline -u8 *getActiveLeafArray(const struct RoseEngine *t, u8 *state) { - return state + t->stateOffsets.activeLeafArray; +u8 *getActiveLeafArray(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.activeLeafArray); } /** \brief Fetch the active array for rose nfas. */ static really_inline -u8 *getActiveLeftArray(const struct RoseEngine *t, u8 *state) { - return state + t->stateOffsets.activeLeftArray; +u8 *getActiveLeftArray(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.activeLeftArray); } static really_inline @@ -97,31 +97,32 @@ const u32 *getAnchoredMap(const struct RoseEngine *t) { } static really_inline -rose_group loadGroups(const struct RoseEngine *t, const u8 *state) { +rose_group loadGroups(const struct RoseEngine *t, const char *state) { return partial_load_u64a(state + t->stateOffsets.groups, t->stateOffsets.groups_size); } static really_inline -void storeGroups(const struct RoseEngine *t, u8 *state, rose_group groups) { +void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) { partial_store_u64a(state + t->stateOffsets.groups, groups, t->stateOffsets.groups_size); } static really_inline -u8 * getFloatingMatcherState(const struct RoseEngine *t, u8 *state) { - return state + t->stateOffsets.floatingMatcherState; +u8 *getFloatingMatcherState(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.floatingMatcherState); } static really_inline -u8 *getLeftfixLagTable(const struct RoseEngine *t, u8 *state) { - return state + t->stateOffsets.leftfixLagTable; +u8 *getLeftfixLagTable(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.leftfixLagTable); } static really_inline -const u8 *getLeftfixLagTableConst(const struct RoseEngine *t, const u8 *state) { - return state + t->stateOffsets.leftfixLagTable; +const u8 *getLeftfixLagTableConst(const struct RoseEngine *t, + const char *state) { + return (const u8 *)(state + t->stateOffsets.leftfixLagTable); } static rose_inline diff --git a/src/rose/stream.c b/src/rose/stream.c index 0bbab851d..981f00181 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,9 +43,7 @@ static rose_inline void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, size_t alen, u64a offset, struct hs_scratch *scratch) { - char *state_base - = (char *)scratch->tctxt.state + t->stateOffsets.anchorState; - + char *state_base = scratch->core_info.state + t->stateOffsets.anchorState; const struct anchored_matcher_info *curr = atable; do { @@ -128,7 +126,7 @@ enum MiracleAction { }; static really_inline -enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, u8 *state, +enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u32 qi, const struct LeftNfaInfo *left, const struct NFA *nfa) { @@ -177,7 +175,7 @@ enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, u8 *state, nfaQueueInitState(q->nfa, q); } else { if (miracle_loc > end_loc - t->historyRequired) { - u8 *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset; + char *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset; u64a offset = ci->buf_offset + miracle_loc; u8 key = offset ? getByteBefore(ci, miracle_loc) : 0; DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset); @@ -205,7 +203,7 @@ enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, u8 *state, static really_inline -char roseCatchUpLeftfix(const struct RoseEngine *t, u8 *state, +char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u32 qi, const struct LeftNfaInfo *left) { assert(!left->transient); // active roses only @@ -323,7 +321,7 @@ char roseCatchUpLeftfix(const struct RoseEngine *t, u8 *state, } static rose_inline -void roseCatchUpLeftfixes(const struct RoseEngine *t, u8 *state, +void roseCatchUpLeftfixes(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { if (!t->activeLeftIterOffset) { // No sparse iter, no non-transient roses. @@ -365,7 +363,7 @@ void roseCatchUpLeftfixes(const struct RoseEngine *t, u8 *state, // Saves out stream state for all our active suffix NFAs. static rose_inline -void roseSaveNfaStreamState(const struct RoseEngine *t, u8 *state, +void roseSaveNfaStreamState(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); @@ -393,7 +391,7 @@ void roseSaveNfaStreamState(const struct RoseEngine *t, u8 *state, } static rose_inline -void ensureStreamNeatAndTidy(const struct RoseEngine *t, u8 *state, +void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, size_t length, u64a offset, u8 delay_rb_status) { struct RoseContext *tctxt = &scratch->tctxt; @@ -425,12 +423,11 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, assert(!can_stop_matching(scratch)); } -void roseStreamExec(const struct RoseEngine *t, u8 *state, - struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback, void *ctx) { +void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, + RoseCallback callback, RoseCallbackSom som_callback, + void *ctx) { DEBUG_PRINTF("OH HAI\n"); assert(t); - assert(state); assert(scratch->core_info.hbuf); assert(scratch->core_info.buf); @@ -449,17 +446,16 @@ void roseStreamExec(const struct RoseEngine *t, u8 *state, return; } + char *state = scratch->core_info.state; struct RoseRuntimeState *rstate = getRuntimeState(state); struct RoseContext *tctxt = &scratch->tctxt; - tctxt->t = t; tctxt->mpv_inactive = 0; tctxt->groups = loadGroups(t, state); tctxt->lit_offset_adjust = offset + 1; // index after last byte tctxt->delayLastEndOffset = offset; tctxt->lastEndOffset = offset; tctxt->filledDelayedSlots = 0; - tctxt->state = state; tctxt->cb = callback; tctxt->cb_som = som_callback; tctxt->userCtx = ctx; diff --git a/src/runtime.c b/src/runtime.c index 27740af22..a33886393 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -670,11 +670,11 @@ void processReportList(const struct RoseEngine *rose, u32 base_offset, /** \brief Initialise SOM state. Used in both block and streaming mode. */ static really_inline -void initSomState(const struct RoseEngine *rose, u8 *state) { +void initSomState(const struct RoseEngine *rose, char *state) { assert(rose && state); const u32 somCount = rose->somLocationCount; - mmbit_clear(state + rose->stateOffsets.somValid, somCount); - mmbit_clear(state + rose->stateOffsets.somWritable, somCount); + mmbit_clear((u8 *)state + rose->stateOffsets.somValid, somCount); + mmbit_clear((u8 *)state + rose->stateOffsets.somWritable, somCount); } static really_inline @@ -682,7 +682,7 @@ void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) { assert(rose); assert(scratch); - initSomState(rose, (u8 *)scratch->core_info.state); + initSomState(rose, scratch->core_info.state); DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len); @@ -697,7 +697,7 @@ void pureLiteralBlockExec(const struct RoseEngine *rose, assert(scratch); const struct HWLM *ftable = getFLiteralMatcher(rose); - initSomState(rose, (u8 *)scratch->core_info.state); + initSomState(rose, scratch->core_info.state); const u8 *buffer = scratch->core_info.buf; size_t length = scratch->core_info.len; DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl); @@ -736,7 +736,7 @@ void soleOutfixBlockExec(const struct RoseEngine *t, assert(t); assert(scratch); - initSomState(t, (u8 *)scratch->core_info.state); + initSomState(t, scratch->core_info.state); assert(t->outfixEndQueue == 1); assert(!t->amatcherOffset); assert(!t->ematcherOffset); @@ -954,7 +954,7 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose) { s->rose = rose; s->offset = 0; - u8 *state = (u8 *)getMultiState(s); + char *state = getMultiState(s); roseInitState(rose, state); @@ -1017,7 +1017,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { return; } - roseEodExec(rose, (u8 *)state, id->offset, scratch, selectAdaptor(rose), + roseEodExec(rose, id->offset, scratch, selectAdaptor(rose), selectSomAdaptor(rose), scratch); } @@ -1191,9 +1191,8 @@ void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { const struct RoseEngine *rose = stream_state->rose; assert(rose); - u8 *rose_state = (u8 *)state; - roseStreamExec(rose, rose_state, scratch, selectAdaptor(rose), - selectSomAdaptor(rose), scratch); + roseStreamExec(rose, scratch, selectAdaptor(rose), selectSomAdaptor(rose), + scratch); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { @@ -1218,7 +1217,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, u8 *hwlm_stream_state; if (rose->floatingStreamState) { - hwlm_stream_state = getFloatingMatcherState(rose, (u8 *)state); + hwlm_stream_state = getFloatingMatcherState(rose, state); } else { hwlm_stream_state = NULL; } diff --git a/src/scratch.h b/src/scratch.h index 1faf60f70..f23ff5dcf 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -104,8 +104,6 @@ struct core_info { /** \brief Rose state information. */ struct RoseContext { - const struct RoseEngine *t; - u8 *state; /**< base pointer to the full state */ u8 mpv_inactive; u64a groups; u64a lit_offset_adjust; /**< offset to add to matches coming from hwlm */ From e63fcec3c7b151c1d834431868e09a08a65f9ee4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jan 2016 08:52:18 +1100 Subject: [PATCH 045/218] Fix release build (unused var) --- src/runtime.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index a33886393..8890e53f9 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1183,8 +1183,7 @@ void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { assert(stream_state); assert(scratch); - char *state = getMultiState(stream_state); - assert(!getBroken(state)); + assert(!getBroken(getMultiState(stream_state))); DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n", stream_state->offset, scratch->core_info.len); From d7c8ffc7fdafd16d92cc6b5696a32a540237d7e6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jan 2016 09:53:15 +1100 Subject: [PATCH 046/218] Use correct type for anchored matcher build --- src/rose/rose_build_anchored.cpp | 23 +++++++++++------------ src/rose/rose_build_anchored.h | 10 ++++++---- src/rose/rose_build_bytecode.cpp | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 675f8c68c..ef0026d43 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -217,13 +217,12 @@ void populate_holder(const simple_anchored_info &sai, const set &exit_ids, h[v].reports.insert(exit_ids.begin(), exit_ids.end()); } -u32 anchoredStateSize(const void *atable) { +u32 anchoredStateSize(const anchored_matcher_info *atable) { if (!atable) { return 0; } - const struct anchored_matcher_info *curr - = (const anchored_matcher_info *)atable; + const struct anchored_matcher_info *curr = atable; // Walk the list until we find the last element; total state size will be // that engine's state offset plus its state requirement. @@ -812,21 +811,21 @@ size_t buildNfas(vector> &anchored_dfas, return total_size; } -aligned_unique_ptr buildAnchoredAutomataMatcher(RoseBuildImpl &tbi, - size_t *asize) { - const CompileContext &cc = tbi.cc; - remapAnchoredReports(tbi); +aligned_unique_ptr +buildAnchoredAutomataMatcher(RoseBuildImpl &build, size_t *asize) { + const CompileContext &cc = build.cc; + remapAnchoredReports(build); - if (tbi.anchored_nfas.empty() && tbi.anchored_simple.empty()) { + if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { DEBUG_PRINTF("empty\n"); *asize = 0; return nullptr; } vector> anchored_dfas; - getAnchoredDfas(tbi, &anchored_dfas); + getAnchoredDfas(build, &anchored_dfas); - mergeAnchoredDfas(anchored_dfas, tbi); + mergeAnchoredDfas(anchored_dfas, build); vector> nfas; vector start_offset; // start offset for each dfa (dots removed) @@ -837,7 +836,7 @@ aligned_unique_ptr buildAnchoredAutomataMatcher(RoseBuildImpl &tbi, } *asize = total_size; - aligned_unique_ptr atable = aligned_zmalloc_unique(total_size); + auto atable = aligned_zmalloc_unique(total_size); char *curr = (char *)atable.get(); u32 state_offset = 0; diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index 1ee8be74f..3ccd8cfb7 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,7 @@ #include #include +struct anchored_matcher_info; struct RoseEngine; namespace ue2 { @@ -46,9 +47,10 @@ class NGHolder; class RoseBuildImpl; struct Grey; -aligned_unique_ptr buildAnchoredAutomataMatcher(RoseBuildImpl &tbi, - size_t *asize); -u32 anchoredStateSize(const void *atable); +aligned_unique_ptr +buildAnchoredAutomataMatcher(RoseBuildImpl &build, size_t *asize); + +u32 anchoredStateSize(const anchored_matcher_info *atable); bool anchoredIsMulti(const RoseEngine &engine); #define ANCHORED_FAIL 0 diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 9444005da..910f320bb 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4028,7 +4028,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { size_t floatingStreamStateRequired = 0; size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. - aligned_unique_ptr atable = + aligned_unique_ptr atable = buildAnchoredAutomataMatcher(*this, &asize); aligned_unique_ptr ftable = buildFloatingMatcher( *this, &fsize, &historyRequired, &floatingStreamStateRequired); From 69682ed2630017ba4996938682b649e5ca9a72a0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jan 2016 10:24:19 +1100 Subject: [PATCH 047/218] Account for multi-dfa case with ANCHORED_DELAY Specifically, we must set build_context::floatingMinLiteralMatchOffset to 1 when thew anchored table contains multiple DFAs, as they can produce unordered matches. This check was already been done, but too late to affect the generation of ANCHORED_DELAY instructions. --- src/rose/rose_build_anchored.cpp | 10 +++------- src/rose/rose_build_anchored.h | 7 ++++++- src/rose/rose_build_bytecode.cpp | 19 ++++++++++++------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index ef0026d43..d7c255fc5 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -235,13 +235,9 @@ u32 anchoredStateSize(const anchored_matcher_info *atable) { return curr->state_offset + nfa->scratchStateSize; } -bool anchoredIsMulti(const RoseEngine &engine) { - const struct anchored_matcher_info *curr - = (const anchored_matcher_info *)getALiteralMatcher(&engine); - - return curr && curr->next_offset; - } - +bool anchoredIsMulti(const anchored_matcher_info *atable) { + return atable && atable->next_offset; +} namespace { diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index 3ccd8cfb7..a212c84dc 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -51,7 +51,12 @@ aligned_unique_ptr buildAnchoredAutomataMatcher(RoseBuildImpl &build, size_t *asize); u32 anchoredStateSize(const anchored_matcher_info *atable); -bool anchoredIsMulti(const RoseEngine &engine); + +/** + * \brief True if there is an anchored matcher and it consists of multiple + * DFAs. + */ +bool anchoredIsMulti(const anchored_matcher_info *atable); #define ANCHORED_FAIL 0 #define ANCHORED_SUCCESS 1 diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 910f320bb..2fa710f40 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2243,7 +2243,15 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre } static -u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build) { +u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, + const anchored_matcher_info *atable) { + if (anchoredIsMulti(atable)) { + DEBUG_PRINTF("multiple anchored dfas\n"); + /* We must regard matches from other anchored tables as unordered, as + * we do for floating matches. */ + return 1; + } + const RoseGraph &g = build.g; u32 minWidth = ROSE_BOUND_INF; for (auto v : vertices_range(g)) { @@ -4036,7 +4044,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { aligned_unique_ptr sbtable = buildSmallBlockMatcher(*this, &sbsize); build_context bc; - bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this); + bc.floatingMinLiteralMatchOffset = + findMinFloatingLiteralMatch(*this, atable.get()); // Build NFAs set no_retrigger_queues; @@ -4363,12 +4372,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); - if (anchoredIsMulti(*engine)) { - DEBUG_PRINTF("multiple anchored dfas\n"); + if (anchoredIsMulti(atable.get())) { engine->maxSafeAnchoredDROffset = 1; - engine->floatingMinLiteralMatchOffset = 1; /* regard matches from other - anchored tables as - floating as unordered. */ } else { /* overly conservative, really need the min offset of non dr anchored matches */ From 1c2fca88407189fb10e0a49f16e6c11a3eac678a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jan 2016 10:38:24 +1100 Subject: [PATCH 048/218] rose_build_anchored: take ref, not pointer --- src/rose/rose_build_anchored.cpp | 12 ++++-------- src/rose/rose_build_anchored.h | 4 ++-- src/rose/rose_build_bytecode.cpp | 6 +++--- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index d7c255fc5..96393ba17 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -217,12 +217,8 @@ void populate_holder(const simple_anchored_info &sai, const set &exit_ids, h[v].reports.insert(exit_ids.begin(), exit_ids.end()); } -u32 anchoredStateSize(const anchored_matcher_info *atable) { - if (!atable) { - return 0; - } - - const struct anchored_matcher_info *curr = atable; +u32 anchoredStateSize(const anchored_matcher_info &atable) { + const struct anchored_matcher_info *curr = &atable; // Walk the list until we find the last element; total state size will be // that engine's state offset plus its state requirement. @@ -235,8 +231,8 @@ u32 anchoredStateSize(const anchored_matcher_info *atable) { return curr->state_offset + nfa->scratchStateSize; } -bool anchoredIsMulti(const anchored_matcher_info *atable) { - return atable && atable->next_offset; +bool anchoredIsMulti(const anchored_matcher_info &atable) { + return atable.next_offset; } namespace { diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index a212c84dc..d399907b0 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -50,13 +50,13 @@ struct Grey; aligned_unique_ptr buildAnchoredAutomataMatcher(RoseBuildImpl &build, size_t *asize); -u32 anchoredStateSize(const anchored_matcher_info *atable); +u32 anchoredStateSize(const anchored_matcher_info &atable); /** * \brief True if there is an anchored matcher and it consists of multiple * DFAs. */ -bool anchoredIsMulti(const anchored_matcher_info *atable); +bool anchoredIsMulti(const anchored_matcher_info &atable); #define ANCHORED_FAIL 0 #define ANCHORED_SUCCESS 1 diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 2fa710f40..45af3bb73 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2245,7 +2245,7 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre static u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, const anchored_matcher_info *atable) { - if (anchoredIsMulti(atable)) { + if (atable && anchoredIsMulti(*atable)) { DEBUG_PRINTF("multiple anchored dfas\n"); /* We must regard matches from other anchored tables as unordered, as * we do for floating matches. */ @@ -4199,7 +4199,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build engine header and copy tables into place. - u32 anchorStateSize = anchoredStateSize(atable.get()); + u32 anchorStateSize = atable ? anchoredStateSize(*atable) : 0; DEBUG_PRINTF("rose history required %zu\n", historyRequired); assert(!cc.streaming || historyRequired <= cc.grey.maxHistoryAvailable); @@ -4372,7 +4372,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); - if (anchoredIsMulti(atable.get())) { + if (atable && anchoredIsMulti(*atable)) { engine->maxSafeAnchoredDROffset = 1; } else { /* overly conservative, really need the min offset of non dr anchored From de61b32e98ce272ff7f8d3b1aa0ef90515c925ad Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 7 Jan 2016 11:56:57 +1100 Subject: [PATCH 049/218] Use fatbit for anch log, delay slots in scratch Since these structures are in scratch, they do not have to be as small as possible and we can use fatbit instead of multibit to improve performance. --- src/rose/catchup.c | 12 ++++----- src/rose/match.c | 43 +++++++++++++++----------------- src/rose/match.h | 9 ++++--- src/rose/program_runtime.h | 8 +++--- src/rose/rose_build_bytecode.cpp | 1 - src/rose/rose_dump.cpp | 1 - src/rose/rose_internal.h | 1 - src/scratch.c | 38 +++++++++++++++++----------- src/scratch.h | 19 +++++++------- src/util/fatbit.h | 6 ++++- 10 files changed, 72 insertions(+), 66 deletions(-) diff --git a/src/rose/catchup.c b/src/rose/catchup.c index d1ef41ff1..6893df0eb 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -105,13 +105,13 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt, assert(tctxt->curr_anchored_loc != MMB_INVALID); struct hs_scratch *scratch = tctxtToScratch(tctxt); - u8 **anchoredRows = getAnchoredLog(scratch); + struct fatbit **anchoredRows = getAnchoredLog(scratch); u32 region_width = t->anchoredMatches; - u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc]; + struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc]; - tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width, - tctxt->curr_row_offset); + tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, + tctxt->curr_row_offset); DEBUG_PRINTF("next %u [idx = %u] @%llu\n", *reportId, tctxt->curr_row_offset, *end); if (tctxt->curr_row_offset != MMB_INVALID) { @@ -132,8 +132,8 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt, assert(tctxt->curr_anchored_loc < scratch->anchored_region_len); curr_row = anchoredRows[tctxt->curr_anchored_loc]; - tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width, - MMB_INVALID); + tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, + MMB_INVALID); assert(tctxt->curr_row_offset != MMB_INVALID); *end = tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1; diff --git a/src/rose/match.c b/src/rose/match.c index f614423b7..6397b90ee 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -125,7 +125,7 @@ void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, u64a end) { struct hs_scratch *scratch = tctxtToScratch(tctxt); const struct RoseEngine *t = scratch->core_info.rose; - u8 **anchoredRows = getAnchoredLog(scratch); + struct fatbit **anchoredRows = getAnchoredLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", reportId, end); assert(end - t->maxSafeAnchoredDROffset >= 1); @@ -135,13 +135,13 @@ void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, if (!bf64_set(&scratch->am_log_sum, adj_end)) { // first time, clear row - mmbit_clear(anchoredRows[adj_end], t->anchoredMatches); + fatbit_clear(anchoredRows[adj_end]); } u32 idx = getAnchoredInverseMap(t)[reportId]; DEBUG_PRINTF("record %u @ %llu index %u\n", reportId, end, idx); assert(idx < t->anchoredMatches); - mmbit_set(anchoredRows[adj_end], t->anchoredMatches, idx); + fatbit_set(anchoredRows[adj_end], t->anchoredMatches, idx); } static rose_inline @@ -150,21 +150,21 @@ void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id, assert(end); struct hs_scratch *scratch = tctxtToScratch(tctxt); const struct RoseEngine *t = scratch->core_info.rose; - u8 **anchoredLiteralRows = getAnchoredLiteralLog(scratch); + struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); if (!bf64_set(&scratch->al_log_sum, end - 1)) { // first time, clear row DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); - mmbit_clear(anchoredLiteralRows[end - 1], t->anchored_count); + fatbit_clear(anchoredLiteralRows[end - 1]); } u32 rel_idx = literal_id - t->anchored_base_id; DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, t->anchored_count); assert(rel_idx < t->anchored_count); - mmbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); } hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, @@ -447,11 +447,11 @@ hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, static rose_inline hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, - const u8 *delaySlotBase, size_t delaySlotSize, - u32 vicIndex, u64a offset) { + struct fatbit **delaySlots, u32 vicIndex, + u64a offset) { /* assert(!tctxt->in_anchored); */ assert(vicIndex < DELAY_SLOT_COUNT); - const u8 *vicSlot = delaySlotBase + delaySlotSize * vicIndex; + const struct fatbit *vicSlot = delaySlots[vicIndex]; u32 delay_count = t->delay_count; if (offset < t->floatingMinLiteralMatchOffset) { @@ -463,8 +463,8 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, roseFlushLastByteHistory(t, scratch->core_info.state, offset, tctxt); tctxt->lastEndOffset = offset; - for (u32 it = mmbit_iterate(vicSlot, delay_count, MMB_INVALID); - it != MMB_INVALID; it = mmbit_iterate(vicSlot, delay_count, it)) { + for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); + it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { u32 literal_id = t->delay_base_id + it; UNUSED rose_group old_groups = tctxt->groups; @@ -490,12 +490,13 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, static really_inline hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, struct RoseContext *tctxt, u32 curr_loc) { - u8 *curr_row = getAnchoredLiteralLog(tctxtToScratch(tctxt))[curr_loc - 1]; + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; u32 region_width = t->anchored_count; DEBUG_PRINTF("report matches at curr loc\n"); - for (u32 it = mmbit_iterate(curr_row, region_width, MMB_INVALID); - it != MMB_INVALID; it = mmbit_iterate(curr_row, region_width, it)) { + for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID); + it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) { DEBUG_PRINTF("it = %u/%u\n", it, region_width); u32 literal_id = t->anchored_base_id + it; @@ -519,7 +520,6 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, } /* clear row; does not invalidate iteration */ - struct hs_scratch *scratch = tctxtToScratch(tctxt); bf64_unset(&scratch->al_log_sum, curr_loc - 1); return HWLM_CONTINUE_MATCHING; @@ -566,7 +566,7 @@ hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, static really_inline hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt, u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots, - u8 *delaySlotBase, size_t delaySlotSize) { + struct fatbit **delaySlots) { /* assert (!tctxt->in_anchored); */ while (victimDelaySlots) { @@ -579,9 +579,8 @@ hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt, return HWLM_TERMINATE_MATCHING; } - if (playDelaySlot(t, tctxt, delaySlotBase, delaySlotSize, - vic % DELAY_SLOT_COUNT, vicOffset) - == HWLM_TERMINATE_MATCHING) { + if (playDelaySlot(t, tctxt, delaySlots, vic % DELAY_SLOT_COUNT, + vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -609,8 +608,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { } { - u8 *delaySlotBase = getDelaySlots(scratch); - size_t delaySlotSize = t->delay_slot_size; + struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt)); u32 lastIndex = lastEnd & DELAY_MASK; u32 currIndex = currEnd & DELAY_MASK; @@ -664,8 +662,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { } if (playVictims(t, tctxt, &anchored_it, lastEnd, victimDelaySlots, - delaySlotBase, delaySlotSize) - == HWLM_TERMINATE_MATCHING) { + delaySlots) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } diff --git a/src/rose/match.h b/src/rose/match.h index f3b8fe73c..2b6dfb5d1 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -40,6 +40,7 @@ #include "nfa/nfa_api_util.h" #include "som/som_runtime.h" #include "util/bitutils.h" +#include "util/fatbit.h" #include "util/internal_report.h" #include "util/multibit.h" @@ -60,16 +61,16 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx); static rose_inline void resetAnchoredLog(const struct RoseEngine *t, struct hs_scratch *scratch) { - u8 **anchoredRows = getAnchoredLog(scratch); + struct fatbit **anchoredRows = getAnchoredLog(scratch); u32 region_width = t->anchoredMatches; struct RoseContext *tctxt = &scratch->tctxt; tctxt->curr_anchored_loc = bf64_iterate(scratch->am_log_sum, MMB_INVALID); if (tctxt->curr_anchored_loc != MMB_INVALID) { assert(tctxt->curr_anchored_loc < scratch->anchored_region_len); - u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc]; - tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width, - MMB_INVALID); + struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc]; + tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, + MMB_INVALID); assert(tctxt->curr_row_offset != MMB_INVALID); } DEBUG_PRINTF("AL reset --> %u, %u\n", tctxt->curr_anchored_loc, diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index e8e60c7ff..309fee5ba 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -127,16 +127,16 @@ void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay, } const u32 delay_count = t->delay_count; - u8 *slot = getDelaySlots(tctxtToScratch(tctxt)) + - (t->delay_slot_size * slot_index); + struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt)); + struct fatbit *slot = delaySlots[slot_index]; DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { tctxt->filledDelayedSlots |= 1U << slot_index; - mmbit_clear(slot, delay_count); + fatbit_clear(slot); } - mmbit_set(slot, delay_count, delay_index); + fatbit_set(slot, delay_count, delay_index); } static rose_inline diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 45af3bb73..c640f0912 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4311,7 +4311,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id); engine->delay_count = delay_count; - engine->delay_slot_size = mmbit_size(delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; engine->anchored_count = delay_base_id - anchored_base_id; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index cd70c734c..25ec7bae6 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -884,7 +884,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, size); DUMP_U32(t, anchoredMatches); DUMP_U32(t, delay_count); - DUMP_U32(t, delay_slot_size); DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); DUMP_U32(t, anchored_base_id); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index c90256008..a1f91cd39 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -447,7 +447,6 @@ struct RoseEngine { u32 size; // (bytes) u32 anchoredMatches; /* number of anchored roles generating matches */ u32 delay_count; /* number of delayed literal ids. */ - u32 delay_slot_size; /* size of delay slot mmbit. */ u32 delay_base_id; /* literal id of the first delayed literal. * delayed literal ids are contiguous */ u32 anchored_count; /* number of anchored literal ids */ diff --git a/src/scratch.c b/src/scratch.c index 30241ab49..eff2289af 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -74,14 +74,16 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { assert(anchored_literal_region_len < 8 * sizeof(s->am_log_sum)); size_t anchored_region_size = anchored_region_len - * (mmbit_size(anchored_region_width) + sizeof(u8 *)); + * (fatbit_size(anchored_region_width) + sizeof(struct fatbit *)); anchored_region_size = ROUNDUP_N(anchored_region_size, 8); size_t anchored_literal_region_size = anchored_literal_region_len - * (mmbit_size(anchored_literal_region_width) + sizeof(u8 *)); + * (fatbit_size(anchored_literal_region_width) + sizeof(struct fatbit *)); anchored_literal_region_size = ROUNDUP_N(anchored_literal_region_size, 8); - size_t delay_size = mmbit_size(proto->delay_count) * DELAY_SLOT_COUNT; + size_t delay_region_size = DELAY_SLOT_COUNT * + (fatbit_size(proto->delay_count) + sizeof(struct fatbit *)); + delay_region_size = ROUNDUP_N(delay_region_size, 8); size_t nfa_context_size = 2 * sizeof(struct NFAContext512) + 127; @@ -96,7 +98,8 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + 2 * fatbit_size(deduperCount) /* ditto som logs */ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ + anchored_region_size - + anchored_literal_region_size + qmpq_size + delay_size + + anchored_literal_region_size + qmpq_size + + delay_region_size + som_store_size + som_now_size + som_attempted_size @@ -140,23 +143,28 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->som_attempted_store = (u64a *)current; current += som_attempted_store_size; - s->delay_slots = (u8 *)current; - current += delay_size; + current = ROUNDUP_PTR(current, 8); + s->delay_slots = (struct fatbit **)current; + current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT; + for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) { + s->delay_slots[i] = (struct fatbit *)current; + current += fatbit_size(proto->delay_count); + } current = ROUNDUP_PTR(current, 8); - s->am_log = (u8 **)current; - current += sizeof(u8 *) * anchored_region_len; + s->am_log = (struct fatbit **)current; + current += sizeof(struct fatbit *) * anchored_region_len; for (u32 i = 0; i < anchored_region_len; i++) { - s->am_log[i] = (u8 *)current; - current += mmbit_size(anchored_region_width); + s->am_log[i] = (struct fatbit *)current; + current += fatbit_size(anchored_region_width); } current = ROUNDUP_PTR(current, 8); - s->al_log = (u8 **)current; - current += sizeof(u8 *) * anchored_literal_region_len; + s->al_log = (struct fatbit **)current; + current += sizeof(struct fatbit *) * anchored_literal_region_len; for (u32 i = 0; i < anchored_literal_region_len; i++) { - s->al_log[i] = (u8 *)current; - current += mmbit_size(anchored_literal_region_width); + s->al_log[i] = (struct fatbit *)current; + current += fatbit_size(anchored_literal_region_width); } current = ROUNDUP_PTR(current, 8); diff --git a/src/scratch.h b/src/scratch.h index f23ff5dcf..fa112a568 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -37,7 +37,6 @@ #define SCRATCH_H_DA6D4FC06FF410 #include "ue2common.h" -#include "util/multibit_internal.h" #include "rose/rose_types.h" #ifdef __cplusplus @@ -133,7 +132,7 @@ struct RoseContext { struct match_deduper { struct fatbit *log[2]; /**< even, odd logs */ - struct fatbit *som_log[2]; /**< even, odd mmbit logs for som */ + struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */ u64a *som_start_log[2]; /**< even, odd start offset logs for som */ u32 log_size; u64a current_report_offset; @@ -162,9 +161,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { struct mq *queues; struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid * & active */ - u8 *delay_slots; - u8 **am_log; - u8 **al_log; + struct fatbit **delay_slots; + struct fatbit **am_log; + struct fatbit **al_log; u64a am_log_sum; u64a al_log_sum; struct catchup_pq catchup_pq; @@ -178,7 +177,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 scratchSize; u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; u32 handledKeyCount; - struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already + struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already * handled by this literal */ u64a *som_store; /**< array of som locations */ u64a *som_attempted_store; /**< array of som locations for fail stores */ @@ -198,18 +197,18 @@ struct hs_scratch *tctxtToScratch(struct RoseContext *tctxt) { } static really_inline -u8 **getAnchoredLog(struct hs_scratch *scratch) { /* array of mmbit ptr */ +struct fatbit **getAnchoredLog(struct hs_scratch *scratch) { return scratch->am_log; } -/* array of mmbit ptr; TODO: why not an array of mmbits? */ +/* array of fatbit ptr; TODO: why not an array of fatbits? */ static really_inline -u8 **getAnchoredLiteralLog(struct hs_scratch *scratch) { +struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) { return scratch->al_log; } static really_inline -u8 *getDelaySlots(struct hs_scratch *scratch) { +struct fatbit **getDelaySlots(struct hs_scratch *scratch) { return scratch->delay_slots; } diff --git a/src/util/fatbit.h b/src/util/fatbit.h index cf9062695..ad6076386 100644 --- a/src/util/fatbit.h +++ b/src/util/fatbit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,21 +58,25 @@ void fatbit_clear(struct fatbit *bits) { static really_inline char fatbit_set(struct fatbit *bits, u32 total_bits, u32 key) { + assert(ISALIGNED(bits)); return mmbit_set(bits->fb_int.raw, total_bits, key); } static really_inline void fatbit_unset(struct fatbit *bits, u32 total_bits, u32 key) { + assert(ISALIGNED(bits)); mmbit_unset(bits->fb_int.raw, total_bits, key); } static really_inline char fatbit_isset(const struct fatbit *bits, u32 total_bits, u32 key) { + assert(ISALIGNED(bits)); return mmbit_isset(bits->fb_int.raw, total_bits, key); } static really_inline u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) { + assert(ISALIGNED(bits)); /* TODO: iterate_flat could be specialised as we don't have to worry about * partial blocks. */ return mmbit_iterate(bits->fb_int.raw, total_bits, it_in); From 755e6700c1e901126db973b6dc471d795331f275 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jan 2016 14:48:22 +1100 Subject: [PATCH 050/218] scratch: correctly align fatbit arrays This fixes an assertion failure on 32-bit targets. --- src/scratch.c | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/src/scratch.c b/src/scratch.c index eff2289af..35d09bbe2 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -46,6 +46,27 @@ #include "util/fatbit.h" #include "util/multibit.h" +/** + * Determine the space required for a correctly aligned array of fatbit + * structure, laid out as: + * + * - an array of num_entries pointers, each to a fatbit. + * - an array of fatbit structures, each of size fatbit_size(num_keys). + */ +static +size_t fatbit_array_size(u32 num_entries, u32 num_keys) { + size_t len = 0; + + // Array of pointers to each fatbit entry. + len += sizeof(struct fatbit *) * num_entries; + + // Fatbit entries themselves. + len = ROUNDUP_N(len, alignof(struct fatbit)); + len += (size_t)fatbit_size(num_keys) * num_entries; + + return ROUNDUP_N(len, 8); // Round up for potential padding. +} + /** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete * scratch region from a prototype structure. */ static @@ -73,17 +94,12 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { assert(anchored_region_len < 8 * sizeof(s->am_log_sum)); assert(anchored_literal_region_len < 8 * sizeof(s->am_log_sum)); - size_t anchored_region_size = anchored_region_len - * (fatbit_size(anchored_region_width) + sizeof(struct fatbit *)); - anchored_region_size = ROUNDUP_N(anchored_region_size, 8); - - size_t anchored_literal_region_size = anchored_literal_region_len - * (fatbit_size(anchored_literal_region_width) + sizeof(struct fatbit *)); - anchored_literal_region_size = ROUNDUP_N(anchored_literal_region_size, 8); - - size_t delay_region_size = DELAY_SLOT_COUNT * - (fatbit_size(proto->delay_count) + sizeof(struct fatbit *)); - delay_region_size = ROUNDUP_N(delay_region_size, 8); + size_t anchored_region_size = + fatbit_array_size(anchored_region_len, anchored_region_width); + size_t anchored_literal_region_size = fatbit_array_size( + anchored_literal_region_len, anchored_literal_region_width); + size_t delay_region_size = + fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_count); size_t nfa_context_size = 2 * sizeof(struct NFAContext512) + 127; @@ -143,27 +159,33 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->som_attempted_store = (u64a *)current; current += som_attempted_store_size; - current = ROUNDUP_PTR(current, 8); + current = ROUNDUP_PTR(current, alignof(struct fatbit *)); s->delay_slots = (struct fatbit **)current; current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT; + current = ROUNDUP_PTR(current, alignof(struct fatbit)); for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) { s->delay_slots[i] = (struct fatbit *)current; + assert(ISALIGNED(s->delay_slots[i])); current += fatbit_size(proto->delay_count); } - current = ROUNDUP_PTR(current, 8); + current = ROUNDUP_PTR(current, alignof(struct fatbit *)); s->am_log = (struct fatbit **)current; current += sizeof(struct fatbit *) * anchored_region_len; + current = ROUNDUP_PTR(current, alignof(struct fatbit)); for (u32 i = 0; i < anchored_region_len; i++) { s->am_log[i] = (struct fatbit *)current; + assert(ISALIGNED(s->am_log[i])); current += fatbit_size(anchored_region_width); } - current = ROUNDUP_PTR(current, 8); + current = ROUNDUP_PTR(current, alignof(struct fatbit *)); s->al_log = (struct fatbit **)current; current += sizeof(struct fatbit *) * anchored_literal_region_len; + current = ROUNDUP_PTR(current, alignof(struct fatbit)); for (u32 i = 0; i < anchored_literal_region_len; i++) { s->al_log[i] = (struct fatbit *)current; + assert(ISALIGNED(s->al_log[i])); current += fatbit_size(anchored_literal_region_width); } From 843ca0e7ccb651f4bfc45abcb81a95c34b920256 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Tue, 12 Jan 2016 16:21:20 +0000 Subject: [PATCH 051/218] Don't look for accel friends for multibyte acceleration --- src/nfa/limex_compile.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index d3e1a8ee0..dc372860f 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -662,15 +662,18 @@ void doAccelCommon(NGHolder &g, state_set.reset(); state_set.set(state_id); + bool is_multi = false; auto p_it = accel->precalc.find(state_set); if (p_it != accel->precalc.end()) { const precalcAccel &pa = p_it->second; offset = max(pa.double_offset, pa.single_offset); + is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE; assert(offset <= MAX_ACCEL_DEPTH); } accel->accelerable.insert(v); - findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]); + if (!is_multi) + findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]); } } From 621dfbebb7f53f6f350e3d0920015cf0937a3890 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 11 Jan 2016 15:19:09 +1100 Subject: [PATCH 052/218] nfaCheckFinalState: define return value Make nfaCheckFinalState return MO_HALT_MATCHING when the user instructs us (via the callback return value) to halt matching. In the caller, check this value and stop matching if told. --- src/nfa/gough.c | 17 +++++++++-------- src/nfa/mcclellan.c | 17 +++++++++-------- src/nfa/nfa_api.h | 5 ++++- src/rose/eod.c | 17 +++++++++++++---- 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/nfa/gough.c b/src/nfa/gough.c index 42a252e5e..c52bca065 100644 --- a/src/nfa/gough.c +++ b/src/nfa/gough.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1049,15 +1049,16 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, } static -void goughCheckEOD(const struct NFA *nfa, u16 s, +char goughCheckEOD(const struct NFA *nfa, u16 s, const struct gough_som_info *som, u64a offset, SomNfaCallback cb, void *ctxt) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); - if (aux->accept_eod) { - doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL); + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; } + return doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL); } char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, @@ -1065,8 +1066,8 @@ char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, UNUSED NfaCallback callback, SomNfaCallback som_callback, void *context) { const struct gough_som_info *som = getSomInfoConst(state); - goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback, context); - return 0; + return goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback, + context); } char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, @@ -1075,8 +1076,8 @@ char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, SomNfaCallback som_callback, void *context) { assert(ISALIGNED_N(state, 8)); const struct gough_som_info *som = getSomInfoConst(state); - goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback, context); - return 0; + return goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback, + context); } char nfaExecGough8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 694196e6a..ef670a930 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -445,14 +445,15 @@ char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf, } static really_inline -void mcclellanCheckEOD(const struct NFA *nfa, u16 s, u64a offset, +char mcclellanCheckEOD(const struct NFA *nfa, u16 s, u64a offset, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); - if (aux->accept_eod) { - doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; } + return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); } static really_inline @@ -1053,8 +1054,8 @@ char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, UNUSED const char *streamState, u64a offset, NfaCallback callback, UNUSED SomNfaCallback som_cb, void *context) { - mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, context); - return 0; + return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, + context); } char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, @@ -1062,8 +1063,8 @@ char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, u64a offset, NfaCallback callback, UNUSED SomNfaCallback som_cb, void *context) { assert(ISALIGNED_N(state, 2)); - mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, context); - return 0; + return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, + context); } char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h index 4e31a6254..84a5417b5 100644 --- a/src/nfa/nfa_api.h +++ b/src/nfa/nfa_api.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -236,6 +236,9 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, * @param callback the callback to call for each match raised * @param som_cb the callback to call for each match raised (Haig) * @param context context pointer passed to each callback + * + * @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise + * @ref MO_CONTINUE_MATCHING. */ char nfaCheckFinalState(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, diff --git a/src/rose/eod.c b/src/rose/eod.c index 1fa2c6dcc..93787137f 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -168,8 +168,12 @@ void roseCheckNfaEod(const struct RoseEngine *t, char *state, nfaExpandState(nfa, fstate, sstate, offset, key); } - nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb, - scratch->tctxt.cb_som, scratch->tctxt.userCtx); + if (nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb, + scratch->tctxt.cb_som, + scratch->tctxt.userCtx) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return; + } } } @@ -213,8 +217,13 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, char *state, u64a offset, * history buffer. */ char rv = nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); if (rv) { /* nfa is still alive */ - nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb, - scratch->tctxt.cb_som, scratch->tctxt.userCtx); + if (nfaCheckFinalState(nfa, fstate, sstate, offset, + scratch->tctxt.cb, scratch->tctxt.cb_som, + scratch->tctxt.userCtx) == + MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return; + } } } } From b36197df26b0534ab254629364d5c674d7f56c47 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Jan 2016 09:18:19 +1100 Subject: [PATCH 053/218] roseEodRunMatcher: correct early return value --- src/rose/eod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/eod.c b/src/rose/eod.c index 93787137f..3ababb830 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -84,7 +84,7 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, if (eod_len < t->eodmatcherMinWidth) { DEBUG_PRINTF("len=%zu < eodmatcherMinWidth=%u\n", eod_len, t->eodmatcherMinWidth); - return MO_CONTINUE_MATCHING; + return HWLM_CONTINUE_MATCHING; } // Ensure that we only need scan the last N bytes, where N is the length of From cca4116861716fa47b57a75527e62b0c32428642 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 11 Jan 2016 09:25:32 +1100 Subject: [PATCH 054/218] Move cyclic path redundancy into reduce loop Sometimes cyclic path redundancy can uncover further reduction work that can be done by the other passes in the reduce loop. --- src/nfagraph/ng.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 758841819..bc3aea381 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -167,6 +167,7 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, changed |= removeEdgeRedundancy(g, som, cc); changed |= reduceGraphEquivalences(g, cc); changed |= removeRedundancy(g, som); + changed |= removeCyclicPathRedundancy(g); if (!changed) { DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass); break; @@ -183,7 +184,6 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, removeEdgeRedundancy(g, som, cc); } - removeCyclicPathRedundancy(g); removeCyclicDominated(g, som); if (!som) { From 70620327ccaeb746772f6abfbc329d02a711e680 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 13 Jan 2016 12:39:28 +1100 Subject: [PATCH 055/218] Remove RoseContext::userCtx All Rose callbacks receive scratch as their context. --- src/rose/block.c | 8 ++--- src/rose/catchup.c | 26 ++++++++-------- src/rose/eod.c | 13 ++++---- src/rose/program_runtime.h | 61 ++++++++++++++++---------------------- src/rose/rose.h | 13 ++++---- src/rose/rose_types.h | 4 ++- src/rose/stream.c | 4 +-- src/runtime.c | 7 ++--- src/scratch.h | 1 - 9 files changed, 57 insertions(+), 80 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index a3174b637..3d4a008db 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -135,7 +135,7 @@ void init_outfixes_for_block(const struct RoseEngine *t, static really_inline void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, - void *ctxt, char *state, char is_small_block) { + char *state, char is_small_block) { init_state_for_block(t, state); struct RoseContext *tctxt = &scratch->tctxt; @@ -147,7 +147,6 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->filledDelayedSlots = 0; tctxt->cb = callback; tctxt->cb_som = som_callback; - tctxt->userCtx = ctxt; tctxt->lastMatchOffset = 0; tctxt->minMatchOffset = 0; tctxt->minNonMpvMatchOffset = 0; @@ -166,8 +165,7 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, } void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback, - void *ctx) { + RoseCallback callback, RoseCallbackSom som_callback) { assert(t); assert(scratch); assert(scratch->core_info.buf); @@ -185,7 +183,7 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, char *state = scratch->core_info.state; - init_for_block(t, scratch, callback, som_callback, ctx, state, + init_for_block(t, scratch, callback, som_callback, state, is_small_block); struct RoseContext *tctxt = &scratch->tctxt; diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 6893df0eb..b302fbdd7 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -338,7 +338,7 @@ int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb(offset, id, scratch); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -364,7 +364,7 @@ int roseNfaFinalBlastAdaptorNoInternal(u64a offset, ReportID id, offset, id); updateLastMatchOffset(tctxt, offset); - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb(offset, id, scratch); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -544,7 +544,7 @@ int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { updateLastMatchOffset(tctxt, offset); - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb(offset, id, scratch); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -574,7 +574,7 @@ int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { offset, id); updateLastMatchOffset(tctxt, offset); - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb(offset, id, scratch); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -600,7 +600,7 @@ int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb(offset, id, scratch); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -624,7 +624,7 @@ int roseNfaBlastAdaptorNoInternalNoChain(u64a offset, ReportID id, offset, id); updateLastMatchOffset(tctxt, offset); - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb(offset, id, scratch); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -656,7 +656,7 @@ int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, updateLastMatchOffset(tctxt, offset); /* must be a external report as haig cannot directly participate in chain */ - int cb_rv = tctxt->cb_som(from_offset, offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb_som(from_offset, offset, id, scratch); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -679,7 +679,7 @@ int roseNfaAdaptor(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); + int cb_rv = tctxt->cb(offset, id, scratch); return cb_rv; } @@ -688,8 +688,7 @@ int roseNfaAdaptorNoInternal(u64a offset, ReportID id, void *context) { DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); updateLastMatchOffset(tctxt, offset); - int cb_rv = tctxt->cb(offset, id, tctxt->userCtx); - return cb_rv; + return tctxt->cb(offset, id, tctxtToScratch(tctxt)); } int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, @@ -699,8 +698,7 @@ int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, updateLastMatchOffset(tctxt, offset); /* must be a external report as haig cannot directly participate in chain */ - int cb_rv = tctxt->cb_som(from_offset, offset, id, tctxt->userCtx); - return cb_rv; + return tctxt->cb_som(from_offset, offset, id, tctxtToScratch(tctxt)); } static really_inline @@ -1155,7 +1153,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, goto next; } - if (tctxt->cb(anchored_end, anchored_report, tctxt->userCtx) + if (tctxt->cb(anchored_end, anchored_report, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; @@ -1271,7 +1269,7 @@ hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch) { goto next; } - if (tctxt->cb(anchored_end, anchored_report, tctxt->userCtx) + if (tctxt->cb(anchored_end, anchored_report, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; diff --git a/src/rose/eod.c b/src/rose/eod.c index 3ababb830..2e414ad7e 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -35,7 +35,7 @@ static really_inline void initContext(const struct RoseEngine *t, char *state, u64a offset, struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback, void *ctx) { + RoseCallbackSom som_callback) { struct RoseContext *tctxt = &scratch->tctxt; tctxt->groups = loadGroups(t, state); /* TODO: diff groups for eod */ tctxt->lit_offset_adjust = scratch->core_info.buf_offset @@ -46,7 +46,6 @@ void initContext(const struct RoseEngine *t, char *state, u64a offset, tctxt->filledDelayedSlots = 0; tctxt->cb = callback; tctxt->cb_som = som_callback; - tctxt->userCtx = ctx; tctxt->lastMatchOffset = 0; tctxt->minMatchOffset = 0; tctxt->minNonMpvMatchOffset = 0; @@ -170,7 +169,7 @@ void roseCheckNfaEod(const struct RoseEngine *t, char *state, if (nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb, scratch->tctxt.cb_som, - scratch->tctxt.userCtx) == MO_HALT_MATCHING) { + scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); return; } @@ -219,8 +218,7 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, char *state, u64a offset, if (rv) { /* nfa is still alive */ if (nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb, scratch->tctxt.cb_som, - scratch->tctxt.userCtx) == - MO_HALT_MATCHING) { + scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); return; } @@ -298,10 +296,9 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, void roseEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback, void *context) { + RoseCallbackSom som_callback) { assert(scratch); assert(callback); - assert(context); assert(t->requiresEodCheck); DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf, scratch->core_info.len, scratch->core_info.hbuf, @@ -317,7 +314,7 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, char *state = scratch->core_info.state; assert(state); - initContext(t, state, offset, scratch, callback, som_callback, context); + initContext(t, state, offset, scratch, callback, som_callback); roseEodExec_i(t, state, offset, scratch, 1); } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 309fee5ba..f76689f4a 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -205,6 +205,22 @@ char rosePrefixCheckMiracles(const struct RoseEngine *t, return 1; } +static rose_inline +hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t, + struct hs_scratch *scratch) { + struct core_info *ci = &scratch->core_info; + if (isAllExhausted(t, ci->exhaustionVector)) { + if (!ci->broken) { + ci->broken = BROKEN_EXHAUSTED; + } + scratch->tctxt.groups = 0; + DEBUG_PRINTF("all exhausted, termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + static really_inline hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, @@ -266,16 +282,7 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, assert(!isQueueFull(q)); - if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { - if (!scratch->core_info.broken) { - scratch->core_info.broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; + return roseHaltIfExhausted(t, scratch); } static really_inline @@ -575,7 +582,7 @@ hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, char *state, DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end); updateLastMatchOffset(tctxt, end); - int cb_rv = tctxt->cb(end, id, tctxt->userCtx); + int cb_rv = tctxt->cb(end, id, scratch); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; @@ -585,16 +592,7 @@ hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, char *state, return HWLM_CONTINUE_MATCHING; } - if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { - if (!scratch->core_info.broken) { - scratch->core_info.broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; + return roseHaltIfExhausted(t, scratch); } /* catches up engines enough to ensure any earlier mpv triggers are enqueued @@ -665,7 +663,9 @@ static rose_inline hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, ReportID id, u64a start, u64a end, struct RoseContext *tctxt, char in_anchored) { - if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored) + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + if (roseCatchUpTo(t, state, end, scratch, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -676,7 +676,7 @@ hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, assert(end == tctxt->minMatchOffset); updateLastMatchOffset(tctxt, end); - int cb_rv = tctxt->cb_som(start, end, id, tctxt->userCtx); + int cb_rv = tctxt->cb_som(start, end, id, scratch); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; @@ -686,17 +686,7 @@ hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, return HWLM_CONTINUE_MATCHING; } - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - if (isAllExhausted(t, ci->exhaustionVector)) { - if (!ci->broken) { - ci->broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; + return roseHaltIfExhausted(t, scratch); } static rose_inline @@ -1064,8 +1054,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_EOD) { - if (tctxt->cb(end, ri->report, tctxt->userCtx) == - MO_HALT_MATCHING) { + if (tctxt->cb(end, ri->report, scratch) == MO_HALT_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; diff --git a/src/rose/rose.h b/src/rose/rose.h index c855795e4..16cfa4353 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -42,8 +42,7 @@ void roseInitState(const struct RoseEngine *t, char *state); void roseBlockEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch); void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback, - void *context); + RoseCallback callback, RoseCallbackSom som_callback); static really_inline int roseBlockHasEodWork(const struct RoseEngine *t, @@ -81,8 +80,7 @@ int roseBlockHasEodWork(const struct RoseEngine *t, /* assumes core_info in scratch has been init to point to data */ static really_inline void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback, - void *context) { + RoseCallback callback, RoseCallbackSom som_callback) { assert(t); assert(scratch); assert(scratch->core_info.buf); @@ -99,7 +97,7 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF || length <= t->maxBiAnchoredWidth); - roseBlockExec_i(t, scratch, callback, som_callback, context); + roseBlockExec_i(t, scratch, callback, som_callback); if (!t->requiresEodCheck) { return; @@ -120,12 +118,11 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, /* assumes core_info in scratch has been init to point to data */ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback, - void *context); + RoseCallback callback, RoseCallbackSom som_callback); void roseEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback, void *context); + RoseCallbackSom som_callback); #define ROSE_CONTINUE_MATCHING_NO_EXHAUST 2 diff --git a/src/rose/rose_types.h b/src/rose/rose_types.h index 1b80b2b20..1ba453fe2 100644 --- a/src/rose/rose_types.h +++ b/src/rose/rose_types.h @@ -33,7 +33,9 @@ struct RoseEngine; -// Note: identical signature to NfaCallback +// Note: identical signature to NfaCallback, but all Rose callbacks must be +// passed scratch as their context ptr. + typedef int (*RoseCallback)(u64a offset, ReportID id, void *context); typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id, void *context); diff --git a/src/rose/stream.c b/src/rose/stream.c index 981f00181..71984e92f 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -424,8 +424,7 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, } void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback, - void *ctx) { + RoseCallback callback, RoseCallbackSom som_callback) { DEBUG_PRINTF("OH HAI\n"); assert(t); assert(scratch->core_info.hbuf); @@ -458,7 +457,6 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->filledDelayedSlots = 0; tctxt->cb = callback; tctxt->cb_som = som_callback; - tctxt->userCtx = ctx; tctxt->lastMatchOffset = 0; tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; diff --git a/src/runtime.c b/src/runtime.c index 8890e53f9..5276acf6e 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -687,7 +687,7 @@ void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) { DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len); roseBlockExec(rose, scratch, selectAdaptor(rose), - selectSomAdaptor(rose), scratch); + selectSomAdaptor(rose)); } static really_inline @@ -1018,7 +1018,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { } roseEodExec(rose, id->offset, scratch, selectAdaptor(rose), - selectSomAdaptor(rose), scratch); + selectSomAdaptor(rose)); } static never_inline @@ -1190,8 +1190,7 @@ void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { const struct RoseEngine *rose = stream_state->rose; assert(rose); - roseStreamExec(rose, scratch, selectAdaptor(rose), selectSomAdaptor(rose), - scratch); + roseStreamExec(rose, scratch, selectAdaptor(rose), selectSomAdaptor(rose)); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { diff --git a/src/scratch.h b/src/scratch.h index fa112a568..150db3f24 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -122,7 +122,6 @@ struct RoseContext { * match, cleared if top events arrive */ RoseCallback cb; RoseCallbackSom cb_som; - void *userCtx; u32 filledDelayedSlots; u32 curr_anchored_loc; /**< last read/written row */ u32 curr_row_offset; /**< last read/written entry */ From 4feabf7bd6e32990f6963517676c89e99c8fffa1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 13 Jan 2016 13:20:38 +1100 Subject: [PATCH 056/218] Make Rose callback types explicitly take scratch --- src/rose/eod.c | 31 ++++++++++++-- src/rose/rose_types.h | 11 +++-- src/runtime.c | 94 +++++++++++++++++++++++++++++++++---------- 3 files changed, 105 insertions(+), 31 deletions(-) diff --git a/src/rose/eod.c b/src/rose/eod.c index 2e414ad7e..7bbf8faf1 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -120,6 +120,29 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, return MO_CONTINUE_MATCHING; } +/** + * \brief Adapts an NfaCallback to the rose callback specified in the + * RoseContext. + */ +static +int eodNfaCallback(u64a offset, ReportID report, void *context) { + struct hs_scratch *scratch = context; + assert(scratch->magic == SCRATCH_MAGIC); + return scratch->tctxt.cb(offset, report, scratch); +} + +/** + * \brief Adapts a SomNfaCallback to the rose SOM callback specified in the + * RoseContext. + */ +static +int eodNfaSomCallback(u64a from_offset, u64a to_offset, ReportID report, + void *context) { + struct hs_scratch *scratch = context; + assert(scratch->magic == SCRATCH_MAGIC); + return scratch->tctxt.cb_som(from_offset, to_offset, report, scratch); +} + /** * \brief Check for (and deliver) reports from active output-exposed (suffix * or outfix) NFAs. @@ -167,8 +190,8 @@ void roseCheckNfaEod(const struct RoseEngine *t, char *state, nfaExpandState(nfa, fstate, sstate, offset, key); } - if (nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb, - scratch->tctxt.cb_som, + if (nfaCheckFinalState(nfa, fstate, sstate, offset, eodNfaCallback, + eodNfaSomCallback, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); return; @@ -216,8 +239,8 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, char *state, u64a offset, * history buffer. */ char rv = nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); if (rv) { /* nfa is still alive */ - if (nfaCheckFinalState(nfa, fstate, sstate, offset, - scratch->tctxt.cb, scratch->tctxt.cb_som, + if (nfaCheckFinalState(nfa, fstate, sstate, offset, eodNfaCallback, + eodNfaSomCallback, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); return; diff --git a/src/rose/rose_types.h b/src/rose/rose_types.h index 1ba453fe2..b70328d25 100644 --- a/src/rose/rose_types.h +++ b/src/rose/rose_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,13 +31,12 @@ #include "ue2common.h" -struct RoseEngine; +struct hs_scratch; -// Note: identical signature to NfaCallback, but all Rose callbacks must be -// passed scratch as their context ptr. +typedef int (*RoseCallback)(u64a offset, ReportID id, + struct hs_scratch *scratch); -typedef int (*RoseCallback)(u64a offset, ReportID id, void *context); typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id, - void *context); + struct hs_scratch *scratch); #endif diff --git a/src/runtime.c b/src/runtime.c index 5276acf6e..d51db18b3 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -171,11 +171,12 @@ void setBroken(char *state, u8 broken) { } static really_inline -int roseAdaptor_i(u64a offset, ReportID id, void *context, char is_simple, - char do_som) { +int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, + char is_simple, char do_som) { assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); - struct hs_scratch *scratch = (struct hs_scratch *)context; struct core_info *ci = &scratch->core_info; const struct RoseEngine *rose = ci->rose; DEBUG_PRINTF("internal report %u\n", id); @@ -326,12 +327,13 @@ int roseAdaptor_i(u64a offset, ReportID id, void *context, char is_simple, static really_inline int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, - void *context, char is_simple) { + struct hs_scratch *scratch, char is_simple) { assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); u32 flags = 0; - struct hs_scratch *scratch = (struct hs_scratch *)context; struct core_info *ci = &scratch->core_info; const struct RoseEngine *rose = ci->rose; const struct internal_report *ri = getInternalReport(rose, id); @@ -488,8 +490,8 @@ hwlmcb_rv_t multiDirectAdaptor(u64a real_end, ReportID direct_id, void *context, } static -int roseAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 0, 0); +int roseAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { + return roseAdaptor_i(offset, id, scratch, 0, 0); } static @@ -513,8 +515,8 @@ hwlmcb_rv_t hwlmAdaptor(UNUSED size_t start, size_t end, u32 direct_id, } static -int roseSimpleAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 1, 0); +int roseSimpleAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { + return roseAdaptor_i(offset, id, scratch, 1, 0); } static @@ -539,8 +541,8 @@ hwlmcb_rv_t hwlmSimpleAdaptor(UNUSED size_t start, size_t end, u32 direct_id, } static -int roseSomAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 0, 1); +int roseSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { + return roseAdaptor_i(offset, id, scratch, 0, 1); } static @@ -564,8 +566,8 @@ hwlmcb_rv_t hwlmSomAdaptor(UNUSED size_t start, size_t end, u32 direct_id, } static -int roseSimpleSomAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 1, 1); +int roseSimpleSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { + return roseAdaptor_i(offset, id, scratch, 1, 1); } static @@ -614,14 +616,14 @@ HWLMCallback selectHwlmAdaptor(const struct RoseEngine *rose) { static int roseSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, - void *context) { - return roseSomAdaptor_i(from_offset, to_offset, id, context, 0); + struct hs_scratch *scratch) { + return roseSomAdaptor_i(from_offset, to_offset, id, scratch, 0); } static int roseSimpleSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, - void *context) { - return roseSomAdaptor_i(from_offset, to_offset, id, context, 1); + struct hs_scratch *scratch) { + return roseSomAdaptor_i(from_offset, to_offset, id, scratch, 1); } static really_inline @@ -631,6 +633,56 @@ RoseCallbackSom selectSomAdaptor(const struct RoseEngine *rose) { return is_simple ? roseSimpleSomSomAdaptor : roseSomSomAdaptor; } +static +int outfixSimpleSomAdaptor(u64a offset, ReportID id, void *context) { + return roseAdaptor_i(offset, id, context, 1, 1); +} + +static +int outfixSimpleAdaptor(u64a offset, ReportID id, void *context) { + return roseAdaptor_i(offset, id, context, 1, 0); +} + +static +int outfixSomAdaptor(u64a offset, ReportID id, void *context) { + return roseAdaptor_i(offset, id, context, 0, 1); +} + +static +int outfixAdaptor(u64a offset, ReportID id, void *context) { + return roseAdaptor_i(offset, id, context, 0, 0); +} + +static really_inline +NfaCallback selectOutfixAdaptor(const struct RoseEngine *rose) { + const char is_simple = rose->simpleCallback; + const char do_som = rose->hasSom; + + if (do_som) { + return is_simple ? outfixSimpleSomAdaptor : outfixSomAdaptor; + } else { + return is_simple ? outfixSimpleAdaptor : outfixAdaptor; + } +} + +static +int outfixSimpleSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, + void *context) { + return roseSomAdaptor_i(from_offset, to_offset, id, context, 1); +} + +static +int outfixSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, + void *context) { + return roseSomAdaptor_i(from_offset, to_offset, id, context, 0); +} + +static really_inline +SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) { + const char is_simple = rose->simpleCallback; + return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor; +} + static never_inline void processReportList(const struct RoseEngine *rose, u32 base_offset, u64a stream_offset, hs_scratch_t *scratch) { @@ -720,8 +772,8 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->length = scratch->core_info.len; q->history = scratch->core_info.hbuf; q->hlength = scratch->core_info.hlen; - q->cb = selectAdaptor(t); - q->som_cb = selectSomAdaptor(t); + q->cb = selectOutfixAdaptor(t); + q->som_cb = selectOutfixSomAdaptor(t); q->context = scratch; q->report_current = 0; @@ -792,10 +844,10 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, assert(isMcClellanType(nfa->type)); if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer, - local_alen, selectAdaptor(rose), scratch); + local_alen, selectOutfixAdaptor(rose), scratch); } else { nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer, - local_alen, selectAdaptor(rose), scratch); + local_alen, selectOutfixAdaptor(rose), scratch); } } From 435b08b984edea8fae7dba437cad8995635f859b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jan 2016 13:45:44 +1100 Subject: [PATCH 057/218] Docs for Rose callback types --- src/rose/rose.h | 2 -- src/rose/rose_types.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/rose/rose.h b/src/rose/rose.h index 16cfa4353..95750363c 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -124,6 +124,4 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback); -#define ROSE_CONTINUE_MATCHING_NO_EXHAUST 2 - #endif // ROSE_H diff --git a/src/rose/rose_types.h b/src/rose/rose_types.h index b70328d25..9dcef1cef 100644 --- a/src/rose/rose_types.h +++ b/src/rose/rose_types.h @@ -26,6 +26,10 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** \file + * \brief Rose runtime types (callbacks, etc). + */ + #ifndef ROSE_TYPES_H #define ROSE_TYPES_H @@ -33,9 +37,34 @@ struct hs_scratch; +/** + * \brief Continue without checking for exhaustion. + * + * \ref RoseCallback return value indicating that execution should continue and + * that it is not necessary to check if all reports have been exhausted. + */ +#define ROSE_CONTINUE_MATCHING_NO_EXHAUST 2 + +/** + * \brief The type for a Rose callback. + * + * \return + * - \ref MO_HALT_MATCHING if matching should terminate; + * - \ref MO_CONTINUE_MATCHING if matching should continue; + * - \ref ROSE_CONTINUE_MATCHING_NO_EXHAUST if matching should continue and no + * exhaustion is possible. + */ typedef int (*RoseCallback)(u64a offset, ReportID id, struct hs_scratch *scratch); +/** + * \brief The type for a Rose callback which also tracks start of match. + * + * Behaves just like \ref RoseCallback except that it is provided with both a + * start and an end offset. + * + * \see RoseCallback + */ typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id, struct hs_scratch *scratch); From 9eb328b455bfe0cc0bdf5e577fcc38213c54f45f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jan 2016 16:55:39 +1100 Subject: [PATCH 058/218] RoseRuntimeState no longer needs to be packed This structure only contains u8 values now. In the future we may wish to eliminate it entirely and store the few bits we need more directly. --- src/rose/rose_build_bytecode.cpp | 11 ++++++++--- src/rose/rose_internal.h | 8 -------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c640f0912..a042eb2c5 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -513,9 +513,14 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, u32 activeLeftCount, u32 laggedRoseCount, u32 floatingStreamStateRequired, u32 historyRequired, RoseStateOffsets *so) { - /* runtime state (including role state) first and needs to be u32-aligned */ - u32 curr_offset = sizeof(RoseRuntimeState) - + mmbit_size(rolesWithStateCount); + u32 curr_offset = 0; + + // First, runtime state (stores per-stream state, like whether we need a + // delay rebuild or have been told to halt matching.) + curr_offset += sizeof(RoseRuntimeState); + + // Role state storage. + curr_offset += mmbit_size(rolesWithStateCount); so->activeLeafArray = curr_offset; /* TODO: limit size of array */ curr_offset += mmbit_size(activeArrayCount); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index a1f91cd39..326887da8 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -476,19 +476,11 @@ struct RoseEngine { struct scatter_full_plan state_init; }; -#if defined(_WIN32) -#pragma pack(push, 1) -#endif // Rose runtime state struct RoseRuntimeState { u8 flags; /* high bit true if delay rebuild needed */ u8 broken; /* user has requested that we stop matching */ -#if defined(_WIN32) }; -#pragma pack(pop) -#else -} __attribute__((packed)); -#endif struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 next_offset; /* relative to this, 0 for end */ From 94b33421cafcbc952d3bcb6659f7a5b18313e780 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 28 Jan 2016 17:16:53 +1100 Subject: [PATCH 059/218] ng_filter: Fix bug introduced in 98eff64 If the max width is modified for a region, use the modified version when checking to see if a self-loop must be added on the last vertex. --- src/nfagraph/ng_prefilter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index c0caf1b9e..c2b9eea9b 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -285,7 +285,7 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri, verts.push_back(v); } - if (ri.maxWidth.is_infinite()) { + if (maxWidth.is_infinite()) { add_edge(verts.back(), verts.back(), g); } From 060defe6c45fd756888ebfcf98cfb70af4e829c3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Jan 2016 11:56:01 +1100 Subject: [PATCH 060/218] Rose: move more report handling work into program Move report preconditions (bounds, exhaustion, etc) into program instructions and use a more direct path to the user match callback than the adaptor functions. Report handling has been moved to new file src/report.h. Reporting from EOD now uses the same instructions as normal report handling, rather than its own. Jump target tracking in rose_build_bytecode.cpp has been cleaned up. --- CMakeLists.txt | 1 + src/report.h | 531 +++++++++++++++++++++++++++++++ src/rose/eod.c | 32 +- src/rose/match.c | 48 ++- src/rose/program_runtime.h | 255 ++++++++------- src/rose/rose_build_bytecode.cpp | 459 +++++++++++++++++++------- src/rose/rose_dump.cpp | 123 ++++++- src/rose/rose_program.h | 85 ++++- src/runtime.c | 314 +----------------- src/som/som_runtime.c | 3 +- src/util/report.h | 7 +- 11 files changed, 1279 insertions(+), 579 deletions(-) create mode 100644 src/report.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1abab0fe0..7ae459d84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -384,6 +384,7 @@ set (hs_exec_SRCS src/ue2common.h src/alloc.c src/allocator.h + src/report.h src/runtime.c src/fdr/fdr.c src/fdr/fdr.h diff --git a/src/report.h b/src/report.h new file mode 100644 index 000000000..2fff3b9a4 --- /dev/null +++ b/src/report.h @@ -0,0 +1,531 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Runtime functions to do with reports, inlined into callers. + */ + +#ifndef REPORT_H +#define REPORT_H + +#include "hs_internal.h" +#include "hs_runtime.h" +#include "scratch.h" +#include "ue2common.h" +#include "nfa/callback.h" +#include "nfa/nfa_internal.h" +#include "rose/runtime.h" +#include "som/som_runtime.h" +#include "util/exhaust.h" +#include "util/fatbit.h" +#include "util/internal_report.h" + +static really_inline +int satisfiesMinLength(u64a min_len, u64a from_offset, + u64a to_offset) { + assert(min_len); + + if (from_offset == HS_OFFSET_PAST_HORIZON) { + DEBUG_PRINTF("SOM beyond horizon\n"); + return 1; + } + + DEBUG_PRINTF("match len=%llu, min len=%llu\n", to_offset - from_offset, + min_len); + return to_offset - from_offset >= min_len; +} + +enum DedupeResult { + DEDUPE_CONTINUE, //!< Continue with match, not a dupe. + DEDUPE_SKIP, //!< Don't report this match, dupe or delayed due to SOM. + DEDUPE_HALT //!< User instructed us to stop matching. +}; + +static really_inline +enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, + const struct internal_report *ri, + struct hs_scratch *scratch, u64a offset, + u64a from_offset, u64a to_offset, + const char do_som) { + DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u, do_som=%d\n", offset, + from_offset, to_offset, ri->dkey, do_som); + DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom); + const u32 dkey = ri->dkey; + if (!do_som && dkey == MO_INVALID_IDX) { + DEBUG_PRINTF("nothing to do\n"); + return DEDUPE_CONTINUE; + } + + struct match_deduper *deduper = &scratch->deduper; + if (offset != deduper->current_report_offset) { + assert(deduper->current_report_offset == ~0ULL || + deduper->current_report_offset < offset); + if (offset == deduper->current_report_offset + 1) { + fatbit_clear(deduper->log[offset % 2]); + } else { + fatbit_clear(deduper->log[0]); + fatbit_clear(deduper->log[1]); + } + + if (do_som && flushStoredSomMatches(scratch, offset)) { + return DEDUPE_HALT; + } + deduper->current_report_offset = offset; + } + + if (dkey != MO_INVALID_IDX) { + const u32 dkeyCount = rose->dkeyCount; + const s32 offset_adj = ri->offsetAdjust; + if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) { + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { + /* we have already raised this report at this offset, squash + * dupe match. */ + DEBUG_PRINTF("dedupe\n"); + return DEDUPE_SKIP; + } + } else if (do_som) { + /* SOM external event */ + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + u64a *starts = deduper->som_start_log[to_offset % 2]; + if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) { + starts[dkey] = MIN(starts[dkey], from_offset); + } else { + starts[dkey] = from_offset; + } + DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]); + + if (offset_adj) { + deduper->som_log_dirty |= 1; + } else { + deduper->som_log_dirty |= 2; + } + + return DEDUPE_SKIP; + } + } + + return DEDUPE_CONTINUE; +} + +static really_inline +enum DedupeResult dedupeCatchupSom(const struct RoseEngine *rose, + const struct internal_report *ri, + struct hs_scratch *scratch, u64a offset, + u64a from_offset, u64a to_offset) { + DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u\n", offset, + from_offset, to_offset, ri->dkey); + DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom); + + struct match_deduper *deduper = &scratch->deduper; + if (offset != deduper->current_report_offset) { + assert(deduper->current_report_offset == ~0ULL || + deduper->current_report_offset < offset); + if (offset == deduper->current_report_offset + 1) { + fatbit_clear(deduper->log[offset % 2]); + } else { + fatbit_clear(deduper->log[0]); + fatbit_clear(deduper->log[1]); + } + + if (flushStoredSomMatches(scratch, offset)) { + return DEDUPE_HALT; + } + deduper->current_report_offset = offset; + } + + const u32 dkey = ri->dkey; + if (dkey != MO_INVALID_IDX) { + const u32 dkeyCount = rose->dkeyCount; + const s32 offset_adj = ri->offsetAdjust; + if (ri->quashSom) { + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { + /* we have already raised this report at this offset, squash + * dupe match. */ + DEBUG_PRINTF("dedupe\n"); + return DEDUPE_SKIP; + } + } else { + /* SOM external event */ + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + u64a *starts = deduper->som_start_log[to_offset % 2]; + if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) { + starts[dkey] = MIN(starts[dkey], from_offset); + } else { + starts[dkey] = from_offset; + } + DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]); + + if (offset_adj) { + deduper->som_log_dirty |= 1; + } else { + deduper->som_log_dirty |= 2; + } + + return DEDUPE_SKIP; + } + } + + return DEDUPE_CONTINUE; +} + +static really_inline +int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, + char is_simple, char do_som) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + DEBUG_PRINTF("internal report %u\n", id); + const struct internal_report *ri = getInternalReport(rose, id); + + assert(isExternalReport(ri)); /* only external reports should reach here */ + + s32 offset_adj = ri->offsetAdjust; + u64a to_offset = offset; + u64a from_offset = 0; + + u32 flags = 0; +#ifndef RELEASE_BUILD + if (offset_adj) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, + offset_adj); + + if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */ + DEBUG_PRINTF("pre broken - halting\n"); + return MO_HALT_MATCHING; + } + + if (!is_simple && ri->hasBounds) { + assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); + assert(ri->minOffset <= ri->maxOffset); + if (offset < ri->minOffset || offset > ri->maxOffset) { + DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", + offset, ri->minOffset, ri->maxOffset); + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } + } + + if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { + DEBUG_PRINTF("ate exhausted match\n"); + return MO_CONTINUE_MATCHING; + } + + if (ri->type == EXTERNAL_CALLBACK) { + from_offset = 0; + } else if (do_som) { + from_offset = handleSomExternal(scratch, ri, to_offset); + } + + to_offset += offset_adj; + assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); + + if (do_som && ri->minLength) { + if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } + if (ri->quashSom) { + from_offset = 0; + } + } + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + + int halt = 0; + + enum DedupeResult dedupe_rv = dedupeCatchup(rose, ri, scratch, offset, + from_offset, to_offset, do_som); + switch (dedupe_rv) { + case DEDUPE_HALT: + halt = 1; + goto exit; + case DEDUPE_SKIP: + halt = 0; + goto exit; + case DEDUPE_CONTINUE: + break; + } + + halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, + flags, ci->userContext); +exit: + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + if (!is_simple && ri->ekey != END_EXHAUST) { + markAsMatched(ci->exhaustionVector, ri->ekey); + return MO_CONTINUE_MATCHING; + } else { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } +} + +/** + * \brief Deliver the given report to the user callback. + * + * Assumes all preconditions (bounds, exhaustion etc) have been checked and + * that dedupe catchup has been done. + */ +static really_inline +int roseDeliverReport(u64a offset, ReportID id, struct hs_scratch *scratch, + char is_exhaustible) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + DEBUG_PRINTF("internal report %u\n", id); + const struct internal_report *ri = getInternalReport(rose, id); + + assert(isExternalReport(ri)); /* only external reports should reach here */ + + const s32 offset_adj = ri->offsetAdjust; + u32 flags = 0; +#ifndef RELEASE_BUILD + if (offset_adj) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, + offset_adj); + + assert(!can_stop_matching(scratch)); + assert(!ri->hasBounds || + (offset >= ri->minOffset && offset <= ri->maxOffset)); + assert(ri->type == EXTERNAL_CALLBACK); + assert(!ri->minLength); + assert(!ri->quashSom); + assert(ri->ekey == INVALID_EKEY || + !isExhausted(ci->exhaustionVector, ri->ekey)); + + u64a from_offset = 0; + u64a to_offset = offset + offset_adj; + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + + int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, + to_offset, flags, ci->userContext); + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + if (is_exhaustible) { + assert(ri->ekey != INVALID_EKEY); + markAsMatched(ci->exhaustionVector, ri->ekey); + return MO_CONTINUE_MATCHING; + } else { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } +} + +static really_inline +int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, + struct hs_scratch *scratch, char is_simple) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + u32 flags = 0; + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + const struct internal_report *ri = getInternalReport(rose, id); + + /* internal events should be handled by rose directly */ + assert(ri->type == EXTERNAL_CALLBACK); + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, + ri->offsetAdjust); + + if (unlikely(can_stop_matching(scratch))) { + DEBUG_PRINTF("pre broken - halting\n"); + return MO_HALT_MATCHING; + } + + if (!is_simple && ri->hasBounds) { + assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); + if (to_offset < ri->minOffset || to_offset > ri->maxOffset) { + DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", + to_offset, ri->minOffset, ri->maxOffset); + return MO_CONTINUE_MATCHING; + } + } + + int halt = 0; + + if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { + DEBUG_PRINTF("ate exhausted match\n"); + goto exit; + } + + u64a offset = to_offset; + + to_offset += ri->offsetAdjust; + assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); + + if (!is_simple && ri->minLength) { + if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) { + return MO_CONTINUE_MATCHING; + } + if (ri->quashSom) { + from_offset = 0; + } + } + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + +#ifndef RELEASE_BUILD + if (ri->offsetAdjust != 0) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + enum DedupeResult dedupe_rv = + dedupeCatchupSom(rose, ri, scratch, offset, from_offset, to_offset); + switch (dedupe_rv) { + case DEDUPE_HALT: + halt = 1; + goto exit; + case DEDUPE_SKIP: + halt = 0; + goto exit; + case DEDUPE_CONTINUE: + break; + } + + halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, + flags, ci->userContext); + + if (!is_simple) { + markAsMatched(ci->exhaustionVector, ri->ekey); + } + +exit: + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} + +/** + * \brief Deliver the given SOM report to the user callback. + * + * Assumes all preconditions (bounds, exhaustion etc) have been checked and + * that dedupe catchup has been done. + */ +static really_inline +int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID id, + struct hs_scratch *scratch, char is_exhaustible) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + u32 flags = 0; + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + const struct internal_report *ri = getInternalReport(rose, id); + + assert(isExternalReport(ri)); /* only external reports should reach here */ + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, + ri->offsetAdjust); + + assert(!can_stop_matching(scratch)); + assert(!ri->hasBounds || + (to_offset >= ri->minOffset && to_offset <= ri->maxOffset)); + assert(ri->ekey == INVALID_EKEY || + !isExhausted(ci->exhaustionVector, ri->ekey)); + + to_offset += ri->offsetAdjust; + assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); + + assert(!ri->minLength || + satisfiesMinLength(ri->minLength, from_offset, to_offset)); + assert(!ri->quashSom || from_offset == 0); + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + +#ifndef RELEASE_BUILD + if (ri->offsetAdjust != 0) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, + to_offset, flags, ci->userContext); + + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + if (is_exhaustible) { + assert(ri->ekey != INVALID_EKEY); + markAsMatched(ci->exhaustionVector, ri->ekey); + return MO_CONTINUE_MATCHING; + } else { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } +} + +#endif // REPORT_H diff --git a/src/rose/eod.c b/src/rose/eod.c index 7bbf8faf1..ade45727b 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -47,9 +47,9 @@ void initContext(const struct RoseEngine *t, char *state, u64a offset, tctxt->cb = callback; tctxt->cb_som = som_callback; tctxt->lastMatchOffset = 0; - tctxt->minMatchOffset = 0; - tctxt->minNonMpvMatchOffset = 0; - tctxt->next_mpv_offset = 0; + tctxt->minMatchOffset = offset; + tctxt->minNonMpvMatchOffset = offset; + tctxt->next_mpv_offset = offset; tctxt->curr_anchored_loc = MMB_INVALID; tctxt->curr_row_offset = 0; @@ -146,14 +146,16 @@ int eodNfaSomCallback(u64a from_offset, u64a to_offset, ReportID report, /** * \brief Check for (and deliver) reports from active output-exposed (suffix * or outfix) NFAs. + * + * \return MO_HALT_MATCHING if the user instructs us to stop. */ static rose_inline -void roseCheckNfaEod(const struct RoseEngine *t, char *state, +int roseCheckNfaEod(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u64a offset, const char is_streaming) { if (!t->eodNfaIterOffset) { DEBUG_PRINTF("no engines that report at EOD\n"); - return; + return MO_CONTINUE_MATCHING; } /* data, len is used for state decompress, should be full available data */ @@ -194,9 +196,11 @@ void roseCheckNfaEod(const struct RoseEngine *t, char *state, eodNfaSomCallback, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); - return; + return MO_HALT_MATCHING; } } + + return MO_CONTINUE_MATCHING; } static rose_inline @@ -283,7 +287,10 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, return; } - roseCheckNfaEod(t, state, scratch, offset, is_streaming); + if (roseCheckNfaEod(t, state, scratch, offset, is_streaming) == + MO_HALT_MATCHING) { + return; + } if (!t->eodIterProgramOffset && !t->ematcherOffset) { DEBUG_PRINTF("no eod accepts\n"); @@ -291,8 +298,7 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, } // Handle pending EOD reports. - int itrv = roseEodRunIterator(t, offset, scratch); - if (itrv == MO_HALT_MATCHING) { + if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { return; } @@ -303,15 +309,17 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, mmbit_clear(getRoleState(state), t->rolesWithStateCount); mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount); - hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming); - if (rv == HWLM_TERMINATE_MATCHING) { + if (roseEodRunMatcher(t, offset, scratch, is_streaming) == + HWLM_TERMINATE_MATCHING) { return; } cleanupAfterEodMatcher(t, state, offset, scratch); // Fire any new EOD reports. - roseEodRunIterator(t, offset, scratch); + if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { + return; + } roseCheckEodSuffixes(t, state, offset, scratch); } diff --git a/src/rose/match.c b/src/rose/match.c index 6397b90ee..a91c03659 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -253,33 +253,53 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, return HWLM_CONTINUE_MATCHING; } +/* handles the firing of external matches */ +static rose_inline +hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end, + struct hs_scratch *scratch) { + struct RoseContext *tctxt = &scratch->tctxt; + + assert(end == tctxt->minMatchOffset); + DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(tctxt, end); + + int cb_rv = tctxt->cb(end, id, scratch); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + return roseHaltIfExhausted(t, scratch); +} + /* handles catchup, som, cb, etc */ static really_inline hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state, struct RoseContext *tctxt, ReportID id, u64a offset, char in_anchored) { - const struct internal_report *ri = getInternalReport(t, id); + struct hs_scratch *scratch = tctxtToScratch(tctxt); - if (ri) { - // Mildly cheesy performance hack: if this report is already exhausted, - // we can quash the match here. - if (ri->ekey != INVALID_EKEY) { - const struct hs_scratch *scratch = tctxtToScratch(tctxt); - if (isExhausted(scratch->core_info.exhaustionVector, ri->ekey)) { - DEBUG_PRINTF("eating exhausted match (report %u, ekey %u)\n", - ri->onmatch, ri->ekey); - return HWLM_CONTINUE_MATCHING; - } - } + if (roseCatchUpTo(t, state, offset, scratch, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + const struct internal_report *ri = getInternalReport(t, id); + if (ri) { if (isInternalSomReport(ri)) { - return roseHandleSom(t, state, id, offset, tctxt, in_anchored); + roseHandleSom(t, scratch, id, offset); + return HWLM_CONTINUE_MATCHING; } else if (ri->type == INTERNAL_ROSE_CHAIN) { return roseCatchUpAndHandleChainMatch(t, state, id, offset, tctxt, in_anchored); } } - return roseHandleMatch(t, state, id, offset, tctxt, in_anchored); + + return roseHandleMatch(t, id, offset, scratch); } static really_inline diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index f76689f4a..1e1356e14 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -34,6 +34,7 @@ #include "infix.h" #include "match.h" #include "miracle.h" +#include "report.h" #include "rose.h" #include "rose_internal.h" #include "rose_program.h" @@ -566,29 +567,20 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, pushQueueSom(q, topEvent, loc, start); } -/* handles the firing of external matches */ static rose_inline -hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, char *state, - ReportID id, u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - assert(end == tctxt->minMatchOffset); - DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end); - updateLastMatchOffset(tctxt, end); +hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, + ReportID id, u64a end, char is_exhaustible) { + assert(end == scratch->tctxt.minMatchOffset); + DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(&scratch->tctxt, end); - int cb_rv = tctxt->cb(end, id, scratch); + int cb_rv = roseDeliverReport(end, id, scratch, is_exhaustible); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; } - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return HWLM_CONTINUE_MATCHING; } @@ -613,76 +605,38 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0); } -static rose_inline -hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, char *state, u64a end, - struct RoseContext *tctxt, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - // In SOM processing, we may be able to limit or entirely avoid catchup. - - DEBUG_PRINTF("entry\n"); - - if (end == tctxt->minMatchOffset) { - DEBUG_PRINTF("already caught up\n"); - return HWLM_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("catching up all NFAs\n"); - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - updateMinMatchOffset(tctxt, end); - return HWLM_CONTINUE_MATCHING; -} - static really_inline -hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, char *state, ReportID id, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - +void roseHandleSom(const struct RoseEngine *t, struct hs_scratch *scratch, + ReportID id, u64a end) { DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, - tctxt->minMatchOffset); + scratch->tctxt.minMatchOffset); // Reach into reports and handle internal reports that just manipulate SOM // slots ourselves, rather than going through the callback. - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } + assert(end == scratch->tctxt.minMatchOffset); + DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(&scratch->tctxt, end); const struct internal_report *ri = getInternalReport(t, id); handleSomInternal(scratch, ri, end); - - return HWLM_CONTINUE_MATCHING; } static rose_inline -hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, - ReportID id, u64a start, u64a end, - struct RoseContext *tctxt, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id, - start, end); - DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset); - assert(end == tctxt->minMatchOffset); - - updateLastMatchOffset(tctxt, end); - int cb_rv = tctxt->cb_som(start, end, id, scratch); +hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID id, u64a start, + u64a end, char is_exhaustible) { + assert(end == scratch->tctxt.minMatchOffset); + DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(&scratch->tctxt, end); + + int cb_rv = roseDeliverSomReport(start, end, id, scratch, is_exhaustible); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; } - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return HWLM_CONTINUE_MATCHING; } @@ -690,23 +644,19 @@ hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, } static rose_inline -hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, char *state, - ReportID id, u64a start, u64a end, - struct RoseContext *tctxt, char in_anchored) { +void roseHandleSomSom(const struct RoseEngine *t, ReportID id, u64a start, + u64a end, struct hs_scratch *scratch) { DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", - id, start, end, tctxt->minMatchOffset); + id, start, end, scratch->tctxt.minMatchOffset); // Reach into reports and handle internal reports that just manipulate SOM // slots ourselves, rather than going through the callback. - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } + assert(end == scratch->tctxt.minMatchOffset); + updateLastMatchOffset(&scratch->tctxt, end); const struct internal_report *ri = getInternalReport(t, id); - setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end); - return HWLM_CONTINUE_MATCHING; + setSomFromSomAware(scratch, ri, start, end); } static really_inline @@ -848,14 +798,11 @@ u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, } static rose_inline -char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { - assert(max_bound <= ROSE_BOUND_INF); +char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { + DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end, + min_bound, max_bound); assert(min_bound <= max_bound); - - if (end < min_bound) { - return 0; - } - return max_bound == ROSE_BOUND_INF || end <= max_bound; + return end >= min_bound && end <= max_bound; } @@ -956,9 +903,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_BOUNDS) { - if (!in_anchored && - !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { - DEBUG_PRINTF("failed root bounds check\n"); + if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed bounds check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; @@ -1003,6 +949,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) { + if (roseCatchUpTo(t, scratch->core_info.state, end, scratch, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { assert(ri->distance <= end); som = end - ri->distance; @@ -1016,6 +970,20 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_FROM_REPORT) { + const struct internal_report *ir = + getInternalReport(t, ri->report); + som = handleSomExternal(scratch, ir, end); + DEBUG_PRINTF("som from report %u is %llu\n", ri->report, som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) { + DEBUG_PRINTF("setting SOM to zero\n"); + som = 0; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(TRIGGER_INFIX) { roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel, tctxt); @@ -1033,13 +1001,40 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT) { - if (roseHandleMatch(t, scratch->core_info.state, - ri->report, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { + PROGRAM_CASE(DEDUPE) { + const struct internal_report *ir = + getInternalReport(t, ri->report); + const char do_som = t->hasSom; // FIXME: constant propagate + enum DedupeResult rv = dedupeCatchup( + t, ir, scratch, end, som, end + ir->offsetAdjust, do_som); + switch (rv) { + case DEDUPE_HALT: return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + case DEDUPE_CONTINUE: + break; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_SOM) { + const struct internal_report *ir = + getInternalReport(t, ri->report); + enum DedupeResult rv = dedupeCatchupSom( + t, ir, scratch, end, som, end + ir->offsetAdjust); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + case DEDUPE_CONTINUE: + break; } - work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -1053,18 +1048,32 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_EOD) { - if (tctxt->cb(end, ri->report, scratch) == MO_HALT_MATCHING) { + PROGRAM_CASE(REPORT_SOM_INT) { + roseHandleSom(t, scratch, ri->report, end); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + roseHandleSomSom(t, ri->report, som, end, scratch); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + const char is_exhaustible = 0; + if (roseReport(t, scratch, ri->report, end, is_exhaustible) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_SOM_INT) { - if (roseHandleSom(t, scratch->core_info.state, ri->report, - end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { + PROGRAM_CASE(REPORT_EXHAUST) { + const char is_exhaustible = 1; + if (roseReport(t, scratch, ri->report, end, is_exhaustible) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1072,25 +1081,57 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { - if (roseHandleSomSom(t, scratch->core_info.state, - ri->report, som, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { + const char is_exhaustible = 0; + if (roseReportSom(t, scratch, ri->report, som, end, + is_exhaustible) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_SOM_KNOWN) { - if (roseHandleSomMatch(t, scratch->core_info.state, ri->report, - som, end, tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { + PROGRAM_CASE(REPORT_SOM_EXHAUST) { + const char is_exhaustible = 1; + if (roseReportSom(t, scratch, ri->report, som, end, + is_exhaustible) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_EXHAUSTED) { + DEBUG_PRINTF("check ekey %u\n", ri->ekey); + assert(ri->ekey != INVALID_EKEY); + assert(ri->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(evec, ri->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ri->ekey); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length, + ri->end_adj); + assert(ri->min_length > 0); + assert(ri->end_adj == 0 || ri->end_adj == -1); + assert(som == HS_OFFSET_PAST_HORIZON || som <= end); + if (som != HS_OFFSET_PAST_HORIZON && + ((end + ri->end_adj) - som < ri->min_length)) { + DEBUG_PRINTF("failed check, match len %llu\n", + (u64a)((end + ri->end_adj) - som)); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SET_STATE) { DEBUG_PRINTF("set state index %u\n", ri->index); mmbit_set(getRoleState(scratch->core_info.state), diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a042eb2c5..c7c0891a8 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -141,25 +141,42 @@ struct left_build_info { vector lookaround; // alternative implementation to the NFA }; +/** + * \brief Possible jump targets for roles that perform checks. + * + * Fixed up into offsets before the program is written to bytecode. + */ +enum class JumpTarget { + NO_JUMP, //!< Instruction does not jump. + PROGRAM_END, //!< Jump to end of program. + NEXT_BLOCK, //!< Jump to start of next block (sparse iter check, etc). + FIXUP_DONE, //!< Target fixup already applied. +}; + /** \brief Role instruction model used at compile time. */ class RoseInstruction { public: - RoseInstruction() { - memset(&u, 0, sizeof(u)); - u.end.code = ROSE_INSTR_END; - } - - explicit RoseInstruction(enum RoseInstructionCode c) { + RoseInstruction(enum RoseInstructionCode c, JumpTarget j) : target(j) { memset(&u, 0, sizeof(u)); u.end.code = c; } + explicit RoseInstruction(enum RoseInstructionCode c) + : RoseInstruction(c, JumpTarget::NO_JUMP) {} + bool operator<(const RoseInstruction &a) const { + if (code() != a.code()) { + return code() < a.code(); + } + if (target != a.target) { + return target < a.target; + } return memcmp(&u, &a.u, sizeof(u)) < 0; } bool operator==(const RoseInstruction &a) const { - return memcmp(&u, &a.u, sizeof(u)) == 0; + return code() == a.code() && target == a.target && + memcmp(&u, &a.u, sizeof(u)) == 0; } enum RoseInstructionCode code() const { @@ -180,16 +197,24 @@ class RoseInstruction { case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; + case ROSE_INSTR_CATCH_UP: return &u.catchUp; case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; + case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport; + case ROSE_INSTR_SOM_ZERO: return &u.somZero; case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; - case ROSE_INSTR_REPORT: return &u.report; + case ROSE_INSTR_DEDUPE: return &u.dedupe; + case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom; case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; - case ROSE_INSTR_REPORT_EOD: return &u.reportEod; case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; + case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSom; + case ROSE_INSTR_REPORT: return &u.report; + case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust; case ROSE_INSTR_REPORT_SOM: return &u.reportSom; - case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; + case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust; + case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted; + case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength; case ROSE_INSTR_SET_STATE: return &u.setState; case ROSE_INSTR_SET_GROUPS: return &u.setGroups; case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups; @@ -214,16 +239,24 @@ class RoseInstruction { case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); + case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); + case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport); + case ROSE_INSTR_SOM_ZERO: return sizeof(u.somZero); case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); - case ROSE_INSTR_REPORT: return sizeof(u.report); + case ROSE_INSTR_DEDUPE: return sizeof(u.dedupe); + case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom); case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); - case ROSE_INSTR_REPORT_EOD: return sizeof(u.reportEod); case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); + case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSom); + case ROSE_INSTR_REPORT: return sizeof(u.report); + case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust); case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); - case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); + case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust); + case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted); + case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups); @@ -232,6 +265,7 @@ class RoseInstruction { case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); case ROSE_INSTR_END: return sizeof(u.end); } + assert(0); return 0; } @@ -246,16 +280,24 @@ class RoseInstruction { ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix; ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; ROSE_STRUCT_PUSH_DELAYED pushDelayed; + ROSE_STRUCT_CATCH_UP catchUp; ROSE_STRUCT_SOM_ADJUST somAdjust; ROSE_STRUCT_SOM_LEFTFIX somLeftfix; + ROSE_STRUCT_SOM_FROM_REPORT somFromReport; + ROSE_STRUCT_SOM_ZERO somZero; ROSE_STRUCT_TRIGGER_INFIX triggerInfix; ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix; - ROSE_STRUCT_REPORT report; + ROSE_STRUCT_DEDUPE dedupe; + ROSE_STRUCT_DEDUPE_SOM dedupeSom; ROSE_STRUCT_REPORT_CHAIN reportChain; - ROSE_STRUCT_REPORT_EOD reportEod; ROSE_STRUCT_REPORT_SOM_INT reportSomInt; + ROSE_STRUCT_REPORT_SOM_AWARE reportSomAware; + ROSE_STRUCT_REPORT report; + ROSE_STRUCT_REPORT_EXHAUST reportExhaust; ROSE_STRUCT_REPORT_SOM reportSom; - ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; + ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust; + ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted; + ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength; ROSE_STRUCT_SET_STATE setState; ROSE_STRUCT_SET_GROUPS setGroups; ROSE_STRUCT_SQUASH_GROUPS squashGroups; @@ -264,11 +306,15 @@ class RoseInstruction { ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; ROSE_STRUCT_END end; } u; + + JumpTarget target; }; static size_t hash_value(const RoseInstruction &ri) { size_t val = 0; + boost::hash_combine(val, ri.code()); + boost::hash_combine(val, ri.target); const char *bytes = (const char *)ri.get(); const size_t len = ri.length(); for (size_t i = 0; i < len; i++) { @@ -2619,61 +2665,100 @@ flattenProgram(const vector> &programs) { vector out; vector offsets; // offset of each instruction (bytes) - vector targets; // jump target for each instruction + vector blocks; // track which block we're in + vector block_offsets; // start offsets for each block - DEBUG_PRINTF("%zu programs\n", programs.size()); + DEBUG_PRINTF("%zu program blocks\n", programs.size()); size_t curr_offset = 0; for (const auto &program : programs) { - DEBUG_PRINTF("program with %zu instructions\n", program.size()); + DEBUG_PRINTF("block with %zu instructions\n", program.size()); + block_offsets.push_back(curr_offset); for (const auto &ri : program) { + assert(ri.code() != ROSE_INSTR_END); out.push_back(ri); offsets.push_back(curr_offset); + blocks.push_back(block_offsets.size() - 1); curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } - for (size_t i = 0; i < program.size(); i++) { - targets.push_back(curr_offset); - } } - // Add an END instruction. + // Add a final END instruction, which is its own block. out.emplace_back(ROSE_INSTR_END); + block_offsets.push_back(curr_offset); offsets.push_back(curr_offset); - targets.push_back(curr_offset); - assert(targets.size() == out.size()); assert(offsets.size() == out.size()); for (size_t i = 0; i < out.size(); i++) { auto &ri = out[i]; + + u32 jump_target = 0; + switch (ri.target) { + case JumpTarget::NO_JUMP: + case JumpTarget::FIXUP_DONE: + continue; // Next instruction. + case JumpTarget::PROGRAM_END: + assert(i != out.size() - 1); + jump_target = offsets.back(); + break; + case JumpTarget::NEXT_BLOCK: + assert(blocks[i] + 1 < block_offsets.size()); + jump_target = block_offsets[blocks[i] + 1]; + break; + } + + // We currently always make progress and never jump backwards. + assert(jump_target > offsets[i]); + assert(jump_target <= offsets.back()); + u32 jump_val = jump_target - offsets[i]; + switch (ri.code()) { case ROSE_INSTR_ANCHORED_DELAY: - assert(targets[i] > offsets[i]); // jumps always progress - ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; + ri.u.anchoredDelay.done_jump = jump_val; break; case ROSE_INSTR_CHECK_ONLY_EOD: - assert(targets[i] > offsets[i]); - ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; + ri.u.checkOnlyEod.fail_jump = jump_val; break; case ROSE_INSTR_CHECK_BOUNDS: - assert(targets[i] > offsets[i]); - ri.u.checkBounds.fail_jump = targets[i] - offsets[i]; + ri.u.checkBounds.fail_jump = jump_val; break; case ROSE_INSTR_CHECK_NOT_HANDLED: - assert(targets[i] > offsets[i]); - ri.u.checkNotHandled.fail_jump = targets[i] - offsets[i]; + ri.u.checkNotHandled.fail_jump = jump_val; break; case ROSE_INSTR_CHECK_LOOKAROUND: - assert(targets[i] > offsets[i]); - ri.u.checkLookaround.fail_jump = targets[i] - offsets[i]; + ri.u.checkLookaround.fail_jump = jump_val; break; case ROSE_INSTR_CHECK_LEFTFIX: - assert(targets[i] > offsets[i]); - ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i]; + ri.u.checkLeftfix.fail_jump = jump_val; + break; + case ROSE_INSTR_DEDUPE: + ri.u.dedupe.fail_jump = jump_val; + break; + case ROSE_INSTR_DEDUPE_SOM: + ri.u.dedupeSom.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_EXHAUSTED: + ri.u.checkExhausted.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_MIN_LENGTH: + ri.u.checkMinLength.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_STATE: + ri.u.checkState.fail_jump = jump_val; + break; + case ROSE_INSTR_SPARSE_ITER_BEGIN: + ri.u.sparseIterBegin.fail_jump = jump_val; + break; + case ROSE_INSTR_SPARSE_ITER_NEXT: + ri.u.sparseIterNext.fail_jump = jump_val; break; default: + assert(0); // Unhandled opcode? break; } + + ri.target = JumpTarget::FIXUP_DONE; } return out; @@ -2689,6 +2774,13 @@ u32 writeProgram(build_context &bc, const vector &program) { assert(program.back().code() == ROSE_INSTR_END); assert(program.size() >= 1); + // This program must have been flattened; i.e. all check instructions must + // have their jump offsets set. + assert(all_of(begin(program), end(program), [](const RoseInstruction &ri) { + return ri.target == JumpTarget::NO_JUMP || + ri.target == JumpTarget::FIXUP_DONE; + })); + auto it = bc.program_cache.find(program); if (it != end(bc.program_cache)) { DEBUG_PRINTF("reusing cached program at %u\n", it->second); @@ -2877,7 +2969,8 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, } u32 look_count = verify_u32(look.size()); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, + JumpTarget::NEXT_BLOCK); ri.u.checkLookaround.index = look_idx; ri.u.checkLookaround.count = look_count; program.push_back(ri); @@ -2898,7 +2991,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!build.cc.streaming || build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX, JumpTarget::NEXT_BLOCK); ri.u.checkLeftfix.queue = lni.queue; ri.u.checkLeftfix.lag = build.g[v].left.lag; ri.u.checkLeftfix.report = build.g[v].left.leftfix_report; @@ -2906,7 +2999,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc, +void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { @@ -2919,11 +3012,150 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc, return; } - auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY); + auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY, + JumpTarget::NEXT_BLOCK); ri.u.anchoredDelay.groups = build.g[v].groups; program.push_back(ri); } +static +void makeDedupe(const ReportID id, vector &report_block) { + auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK); + ri.u.dedupe.report = id; + report_block.push_back(move(ri)); +} + +static +void makeDedupeSom(const ReportID id, vector &report_block) { + auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK); + ri.u.dedupeSom.report = id; + report_block.push_back(move(ri)); +} + +static +void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, + vector &program) { + assert(id < build.rm.numReports()); + const Report &report = build.rm.getReport(id); + + vector report_block; + + // If this report has an exhaustion key, we can check it in the program + // rather than waiting until we're in the callback adaptor. + if (report.ekey != INVALID_EKEY) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED, + JumpTarget::NEXT_BLOCK); + ri.u.checkExhausted.ekey = report.ekey; + report_block.push_back(move(ri)); + } + + // Similarly, we can handle min/max offset checks. + if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, + JumpTarget::NEXT_BLOCK); + ri.u.checkBounds.min_bound = report.minOffset; + ri.u.checkBounds.max_bound = report.maxOffset; + report_block.push_back(move(ri)); + } + + // Catch up -- everything except the INTERNAL_ROSE_CHAIN report needs this. + // TODO: this could be floated in front of all the reports and only done + // once. + if (report.type != INTERNAL_ROSE_CHAIN) { + program.emplace_back(ROSE_INSTR_CATCH_UP); + } + + // External SOM reports need their SOM value calculated. + if (isExternalSomReport(report)) { + auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT); + ri.u.somFromReport.report = id; + report_block.push_back(move(ri)); + } + + // Min length constraint. + if (report.minLength > 0) { + assert(build.hasSom); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_MIN_LENGTH, + JumpTarget::NEXT_BLOCK); + ri.u.checkMinLength.end_adj = report.offsetAdjust; + ri.u.checkMinLength.min_length = report.minLength; + report_block.push_back(move(ri)); + } + + if (report.quashSom) { + report_block.emplace_back(ROSE_INSTR_SOM_ZERO); + } + + switch (report.type) { + case EXTERNAL_CALLBACK: + if (!has_som) { + makeDedupe(id, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.emplace_back(ROSE_INSTR_REPORT); + report_block.back().u.report.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST); + report_block.back().u.reportExhaust.report = id; + } + } else { // has_som + makeDedupeSom(id, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM); + report_block.back().u.reportSom.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); + report_block.back().u.reportSomExhaust.report = id; + } + } + break; + case INTERNAL_SOM_LOC_SET: + case INTERNAL_SOM_LOC_SET_IF_UNSET: + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + case INTERNAL_SOM_LOC_COPY: + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + case INTERNAL_SOM_LOC_SET_FROM: + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + if (has_som) { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE); + report_block.back().u.reportSomAware.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT); + report_block.back().u.reportSomInt.report = id; + } + break; + case INTERNAL_ROSE_CHAIN: + report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN); + report_block.back().u.reportChain.report = id; + break; + case EXTERNAL_CALLBACK_SOM_REL: + case EXTERNAL_CALLBACK_SOM_STORED: + case EXTERNAL_CALLBACK_SOM_ABS: + case EXTERNAL_CALLBACK_SOM_REV_NFA: + makeDedupeSom(id, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM); + report_block.back().u.reportSom.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); + report_block.back().u.reportSomExhaust.report = id; + } + break; + default: + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + assert(!report_block.empty()); + report_block = flattenProgram({report_block}); + assert(report_block.back().code() == ROSE_INSTR_END); + report_block.pop_back(); + insert(&program, program.end(), report_block); +} + static void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { @@ -2947,25 +3179,8 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, has_som = true; } - // Write program instructions for reports. for (ReportID id : g[v].reports) { - assert(id < build.rm.numReports()); - const Report &ir = build.rm.getReport(id); - if (isInternalSomReport(ir)) { - auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM - : ROSE_INSTR_REPORT_SOM_INT); - ri.u.report.report = id; - program.push_back(ri); - } else if (ir.type == INTERNAL_ROSE_CHAIN) { - auto ri = RoseInstruction(ROSE_INSTR_REPORT_CHAIN); - ri.u.report.report = id; - program.push_back(ri); - } else { - auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM_KNOWN - : ROSE_INSTR_REPORT); - ri.u.report.report = id; - program.push_back(ri); - } + makeReport(build, id, has_som, program); } } @@ -3093,10 +3308,10 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, // Use the minimum literal length. u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); - u32 min_bound = g[e].minBound + lit_length; - u32 max_bound = g[e].maxBound == ROSE_BOUND_INF - ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; + u64a min_bound = g[e].minBound + lit_length; + u64a max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { assert(g[u].max_offset != ROSE_BOUND_INF); @@ -3110,7 +3325,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, assert(max_bound <= ROSE_BOUND_INF); assert(min_bound <= max_bound); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS); + // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET + // (max value of a u64a) to represent ROSE_BOUND_INF. + if (max_bound == ROSE_BOUND_INF) { + max_bound = MAX_OFFSET; + } + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK); ri.u.checkBounds.min_bound = min_bound; ri.u.checkBounds.max_bound = max_bound; @@ -3138,7 +3359,8 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, if (onlyAtEod(build, v)) { DEBUG_PRINTF("only at eod\n"); - program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD)); + program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD, + JumpTarget::NEXT_BLOCK)); } if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { @@ -3287,7 +3509,8 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, static void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, vector &program) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED, + JumpTarget::NEXT_BLOCK); u32 handled_key; if (contains(bc.handledKeys, v)) { @@ -3328,48 +3551,42 @@ vector makePredProgram(RoseBuildImpl &build, build_context &bc, static u32 addPredBlocksSingle( map>> &predProgramLists, - u32 curr_offset, vector &program) { - assert(predProgramLists.size() == 1); + vector &program) { - u32 pred_state = predProgramLists.begin()->first; - auto subprog = flattenProgram(predProgramLists.begin()->second); + vector> prog_blocks; - // Check our pred state. - auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE); - ri.u.checkState.index = pred_state; - program.push_back(ri); - curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + for (const auto &m : predProgramLists) { + const u32 &pred_state = m.first; + auto subprog = flattenProgram(m.second); - // Add subprogram. - for (const auto &ri : subprog) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + // Check our pred state. + auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE, + JumpTarget::NEXT_BLOCK); + ri.u.checkState.index = pred_state; + subprog.insert(begin(subprog), ri); + assert(subprog.back().code() == ROSE_INSTR_END); + subprog.pop_back(); + prog_blocks.push_back(move(subprog)); } - const u32 end_offset = - curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + auto prog = flattenProgram(prog_blocks); + program.insert(end(program), begin(prog), end(prog)); + return 0; // No iterator. +} - // Fix up the instruction operands. - curr_offset = 0; - for (size_t i = 0; i < program.size(); i++) { - auto &ri = program[i]; - switch (ri.code()) { - case ROSE_INSTR_CHECK_STATE: - ri.u.checkState.fail_jump = end_offset - curr_offset; - break; - default: - break; - } - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); +static +u32 programLength(const vector &program) { + u32 len = 0; + for (const auto &ri : program) { + len += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } - - return 0; // No iterator. + return len; } static u32 addPredBlocksMulti(build_context &bc, map>> &predProgramLists, - u32 curr_offset, vector &program) { + vector &program) { assert(!predProgramLists.empty()); // First, add the iterator itself. @@ -3386,10 +3603,12 @@ u32 addPredBlocksMulti(build_context &bc, // Construct our program, starting with the SPARSE_ITER_BEGIN // instruction, keeping track of the jump offset for each sub-program. + vector sparse_program; vector jump_table; - program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); - curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + sparse_program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN, + JumpTarget::PROGRAM_END)); + u32 curr_offset = programLength(program) + programLength(sparse_program); for (const auto &e : predProgramLists) { DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), @@ -3402,62 +3621,61 @@ u32 addPredBlocksMulti(build_context &bc, // with a SPARSE_ITER_NEXT. assert(!subprog.empty()); assert(subprog.back().code() == ROSE_INSTR_END); - subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); + subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT, + JumpTarget::PROGRAM_END); } - for (const auto &ri : subprog) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } + curr_offset += programLength(subprog); + insert(&sparse_program, end(sparse_program), subprog); } - const u32 end_offset = - curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + // Strip the END instruction from the last block. + assert(sparse_program.back().code() == ROSE_INSTR_END); + sparse_program.pop_back(); + + sparse_program = flattenProgram({sparse_program}); // Write the jump table into the bytecode. const u32 jump_table_offset = add_to_engine_blob(bc, begin(jump_table), end(jump_table)); - // Fix up the instruction operands. + // Write jump table and iterator offset into sparse iter instructions. auto keys_it = begin(keys); - curr_offset = 0; - for (size_t i = 0; i < program.size(); i++) { - auto &ri = program[i]; + for (auto &ri : sparse_program) { switch (ri.code()) { case ROSE_INSTR_SPARSE_ITER_BEGIN: ri.u.sparseIterBegin.iter_offset = iter_offset; ri.u.sparseIterBegin.jump_table = jump_table_offset; - ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; break; case ROSE_INSTR_SPARSE_ITER_NEXT: ri.u.sparseIterNext.iter_offset = iter_offset; ri.u.sparseIterNext.jump_table = jump_table_offset; assert(keys_it != end(keys)); ri.u.sparseIterNext.state = *keys_it++; - ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; break; default: break; } - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } + program.insert(end(program), begin(sparse_program), end(sparse_program)); + return iter_offset; } static u32 addPredBlocks(build_context &bc, map>> &predProgramLists, - u32 curr_offset, vector &program, + vector &program, bool force_sparse_iter) { const size_t num_preds = predProgramLists.size(); if (num_preds == 0) { program = flattenProgram({program}); return 0; // No iterator. } else if (!force_sparse_iter && num_preds == 1) { - return addPredBlocksSingle(predProgramLists, curr_offset, program); + return addPredBlocksSingle(predProgramLists, program); } else { - return addPredBlocksMulti(bc, predProgramLists, curr_offset, program); + return addPredBlocksMulti(bc, predProgramLists, program); } } @@ -3481,8 +3699,7 @@ pair makeSparseIterProgram(build_context &bc, // Add blocks to deal with non-root edges (triggered by sparse iterator or // mmbit_isset checks). This operation will flatten the program up to this // point. - u32 iter_offset = - addPredBlocks(bc, predProgramLists, curr_offset, program, false); + u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false); // If we have a root program, replace the END instruction with it. Note // that the root program has already been flattened. @@ -3823,10 +4040,8 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeRoleCheckNotHandled(bc, v, program); } - for (const auto &report : g[v].reports) { - auto ri = RoseInstruction(ROSE_INSTR_REPORT_EOD); - ri.u.report.report = report; - program.push_back(ri); + for (const auto &id : g[v].reports) { + makeReport(build, id, false, program); } return program; @@ -3870,7 +4085,7 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { // Note: we force the use of a sparse iterator for the EOD program so we // can easily guard EOD execution at runtime. - u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true); + u32 iter_offset = addPredBlocks(bc, predProgramLists, program, true); assert(program.size() > 1); return {writeProgram(bc, program), iter_offset}; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 25ec7bae6..adf737264 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -41,6 +41,7 @@ #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" #include "util/dump_charclass.h" +#include "util/internal_report.h" #include "util/multibit_internal.h" #include "util/multibit.h" @@ -152,6 +153,61 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } } +static +vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { + vector keys; + + if (num_bits == 0) { + return keys; + } + + vector bits(mmbit_size(num_bits), u8{0xff}); // All bits on. + vector state(MAX_SPARSE_ITER_STATES); + + const u8 *b = bits.data(); + mmbit_sparse_state *s = state.data(); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s); + while (i != MMB_INVALID) { + keys.push_back(i); + i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s); + } + + return keys; +} + +static +void dumpJumpTable(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) { + auto *it = + (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset); + auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table); + + for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) { + os << " " << std::setw(4) << std::setfill(' ') << key << " : +" + << *jumps << endl; + ++jumps; + } +} + +static +void dumpReport(ofstream &os, const RoseEngine *t, ReportID report) { + const auto *ir = + (const internal_report *)loadFromByteCodeOffset(t, t->intReportOffset) + + report; + os << " type=" << u32{ir->type}; + os << ", onmatch=" << ir->onmatch; + if (ir->ekey != INVALID_EKEY) { + os << ", ekey=" << ir->ekey; + } + if (ir->dkey != MO_INVALID_IDX) { + os << ", dkey=" << ir->dkey; + } + + os << endl; +} + static string dumpStrMask(const u8 *mask, size_t len) { ostringstream oss; @@ -211,6 +267,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BOUNDS) { + os << " min_bound " << ri->min_bound << endl; + os << " max_bound " << ri->max_bound << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_NOT_HANDLED) { os << " key " << ri->key << endl; os << " fail_jump +" << ri->fail_jump << endl; @@ -239,6 +302,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) {} + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { os << " distance " << ri->distance << endl; } @@ -250,6 +316,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_FROM_REPORT) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) {} + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(TRIGGER_INFIX) { os << " queue " << ri->queue << endl; os << " event " << ri->event << endl; @@ -263,33 +338,72 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT) { + PROGRAM_CASE(DEDUPE) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_CHAIN) { + PROGRAM_CASE(DEDUPE_SOM) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_EOD) { + PROGRAM_CASE(REPORT_CHAIN) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_INT) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EXHAUST) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_SOM_KNOWN) { + PROGRAM_CASE(REPORT_SOM_EXHAUST) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_EXHAUSTED) { + os << " ekey " << ri->ekey << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + os << " end_adj " << ri->end_adj << endl; + os << " min_length " << ri->min_length << endl; + os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -319,6 +433,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(SPARSE_ITER_BEGIN) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; + dumpJumpTable(os, t, ri); os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 37017ca0a..81852f097 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -51,16 +51,33 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. + ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from an internal_report. + ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero. ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. - ROSE_INSTR_REPORT, //!< Fire an ordinary report. + ROSE_INSTR_DEDUPE, //!< Run deduplication for report. + ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report. ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). - ROSE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. - ROSE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. - ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. + ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source. + + /** \brief Fire a report. */ + ROSE_INSTR_REPORT, + + /** \brief Fire an exhaustible report. */ + ROSE_INSTR_REPORT_EXHAUST, + + /** \brief Fire a SOM report. */ + ROSE_INSTR_REPORT_SOM, + + /** \brief Fire an exhaustible SOM report. */ + ROSE_INSTR_REPORT_SOM_EXHAUST, + + ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. + ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. ROSE_INSTR_SET_STATE, //!< Switch a state index on. ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. @@ -106,8 +123,8 @@ struct ROSE_STRUCT_CHECK_ONLY_EOD { struct ROSE_STRUCT_CHECK_BOUNDS { u8 code; //!< From enum RoseInstructionCode. - u32 min_bound; //!< Min distance from zero. - u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). + u64a min_bound; //!< Min distance from zero. + u64a max_bound; //!< Max distance from zero. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -138,6 +155,10 @@ struct ROSE_STRUCT_PUSH_DELAYED { u32 index; // Delay literal index (relative to first delay lit). }; +struct ROSE_STRUCT_CATCH_UP { + u8 code; //!< From enum RoseInstructionCode. +}; + struct ROSE_STRUCT_SOM_ADJUST { u8 code; //!< From enum RoseInstructionCode. u32 distance; //!< Distance to EOM. @@ -149,6 +170,15 @@ struct ROSE_STRUCT_SOM_LEFTFIX { u32 lag; //!< Lag of leftfix for this case. }; +struct ROSE_STRUCT_SOM_FROM_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; //!< EXTERNAL_CALLBACK_SOM_* report to use. +}; + +struct ROSE_STRUCT_SOM_ZERO { + u8 code; //!< From enum RoseInstructionCode. +}; + struct ROSE_STRUCT_TRIGGER_INFIX { u8 code; //!< From enum RoseInstructionCode. u8 cancel; //!< Cancels previous top event. @@ -162,17 +192,19 @@ struct ROSE_STRUCT_TRIGGER_SUFFIX { u32 event; //!< Queue event, from MQE_*. }; -struct ROSE_STRUCT_REPORT { +struct ROSE_STRUCT_DEDUPE { u8 code; //!< From enum RoseInstructionCode. ReportID report; + u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_STRUCT_REPORT_CHAIN { +struct ROSE_STRUCT_DEDUPE_SOM { u8 code; //!< From enum RoseInstructionCode. ReportID report; + u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_STRUCT_REPORT_EOD { +struct ROSE_STRUCT_REPORT_CHAIN { u8 code; //!< From enum RoseInstructionCode. ReportID report; }; @@ -182,16 +214,49 @@ struct ROSE_STRUCT_REPORT_SOM_INT { ReportID report; }; +struct ROSE_STRUCT_REPORT_SOM_AWARE { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + +struct ROSE_STRUCT_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + +struct ROSE_STRUCT_REPORT_EXHAUST { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + struct ROSE_STRUCT_REPORT_SOM { u8 code; //!< From enum RoseInstructionCode. ReportID report; }; -struct ROSE_STRUCT_REPORT_SOM_KNOWN { +struct ROSE_STRUCT_REPORT_SOM_EXHAUST { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + +struct ROSE_STRUCT_REPORT_SOM_EXT { u8 code; //!< From enum RoseInstructionCode. ReportID report; }; +struct ROSE_STRUCT_CHECK_EXHAUSTED { + u8 code; //!< From enum RoseInstructionCode. + u32 ekey; //!< Exhaustion key to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MIN_LENGTH { + u8 code; //!< From enum RoseInstructionCode. + s32 end_adj; //!< Offset adjustment to add to EOM first. + u64a min_length; //!< Minimum distance from SOM to EOM. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_SET_STATE { u8 code; //!< From enum RoseInstructionCode. u32 index; //!< State index in multibit. diff --git a/src/runtime.c b/src/runtime.c index d51db18b3..e38434fdc 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -47,6 +47,7 @@ #include "rose/rose.h" #include "rose/runtime.h" #include "database.h" +#include "report.h" #include "scratch.h" #include "som/som_runtime.h" #include "som/som_stream.h" @@ -56,8 +57,6 @@ #include "util/fatbit.h" #include "util/multibit.h" -#define DEDUPE_MATCHES - static really_inline void prefetch_data(const char *data, unsigned length) { __builtin_prefetch(data); @@ -170,306 +169,6 @@ void setBroken(char *state, u8 broken) { ts->broken = broken; } -static really_inline -int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, - char is_simple, char do_som) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - DEBUG_PRINTF("internal report %u\n", id); - const struct internal_report *ri = getInternalReport(rose, id); - - assert(isExternalReport(ri)); /* only external reports should reach here */ - - s32 offset_adj = ri->offsetAdjust; - UNUSED u32 dkey = ri->dkey; - u64a to_offset = offset; - u64a from_offset = 0; - UNUSED u32 dkeyCount = rose->dkeyCount; - - u32 flags = 0; -#ifndef RELEASE_BUILD - if (offset_adj) { - // alert testing tools that we've got adjusted matches - flags |= HS_MATCH_FLAG_ADJUSTED; - } -#endif - - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, - offset_adj); - - if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */ - DEBUG_PRINTF("pre broken - halting\n"); - return MO_HALT_MATCHING; - } - - if (!is_simple && ri->hasBounds) { - assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); - assert(ri->minOffset <= ri->maxOffset); - if (offset < ri->minOffset || offset > ri->maxOffset) { - DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - offset, ri->minOffset, ri->maxOffset); - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } - } - - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { - DEBUG_PRINTF("ate exhausted match\n"); - return MO_CONTINUE_MATCHING; - } - - if (ri->type == EXTERNAL_CALLBACK) { - from_offset = 0; - } else if (do_som) { - from_offset = handleSomExternal(scratch, ri, to_offset); - } - - to_offset += offset_adj; - assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - - if (do_som && ri->minLength) { - if (from_offset != HS_OFFSET_PAST_HORIZON && - (to_offset - from_offset < ri->minLength)) { - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } - if (ri->quashSom) { - from_offset = 0; - } - } - - DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); - - int halt = 0; - - if (do_som || dkey != MO_INVALID_IDX) { - if (offset != scratch->deduper.current_report_offset) { - assert(scratch->deduper.current_report_offset == ~0ULL || - scratch->deduper.current_report_offset < offset); - if (offset == scratch->deduper.current_report_offset + 1) { - fatbit_clear(scratch->deduper.log[offset % 2]); - } else { - fatbit_clear(scratch->deduper.log[0]); - fatbit_clear(scratch->deduper.log[1]); - } - - DEBUG_PRINTF("adj dedupe offset %hhd\n", do_som); - if (do_som) { - halt = flushStoredSomMatches(scratch, offset); - if (halt) { - goto exit; - } - } - scratch->deduper.current_report_offset = offset; - } - } - -#ifdef DEDUPE_MATCHES - if (dkey != MO_INVALID_IDX) { - if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) { - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); - if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount, - dkey)) { - /* we have already raised this report at this offset, squash dupe - * match. */ - DEBUG_PRINTF("dedupe\n"); - goto exit; - } - } else if (do_som) { - /* SOM external event */ - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); - u64a *starts = scratch->deduper.som_start_log[to_offset % 2]; - if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount, - dkey)) { - starts[dkey] = MIN(starts[dkey], from_offset); - } else { - starts[dkey] = from_offset; - } - - if (offset_adj) { - scratch->deduper.som_log_dirty |= 1; - } else { - scratch->deduper.som_log_dirty |= 2; - } - - goto exit; - } - } -#endif - - halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, - flags, ci->userContext); -#ifdef DEDUPE_MATCHES -exit: -#endif - if (halt) { - DEBUG_PRINTF("callback requested to terminate matches\n"); - - setBroken(ci->state, BROKEN_FROM_USER); - ci->broken = BROKEN_FROM_USER; - - return MO_HALT_MATCHING; - } - - if (!is_simple && ri->ekey != END_EXHAUST) { - markAsMatched(ci->exhaustionVector, ri->ekey); - return MO_CONTINUE_MATCHING; - } else { - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } -} - -static really_inline -int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, - struct hs_scratch *scratch, char is_simple) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - u32 flags = 0; - - struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - const struct internal_report *ri = getInternalReport(rose, id); - - /* internal events should be handled by rose directly */ - assert(ri->type == EXTERNAL_CALLBACK); - - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, - ri->offsetAdjust); - - if (unlikely(can_stop_matching(scratch))) { - DEBUG_PRINTF("pre broken - halting\n"); - return MO_HALT_MATCHING; - } - - if (!is_simple && ri->hasBounds) { - assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); - if (to_offset < ri->minOffset || to_offset > ri->maxOffset) { - DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - to_offset, ri->minOffset, ri->maxOffset); - return MO_CONTINUE_MATCHING; - } - } - - int halt = 0; - - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { - DEBUG_PRINTF("ate exhausted match\n"); - goto do_return; - } - -#ifdef DEDUPE_MATCHES - u64a offset = to_offset; -#endif - - to_offset += ri->offsetAdjust; - assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - - if (!is_simple && ri->minLength) { - if (from_offset != HS_OFFSET_PAST_HORIZON && - (to_offset - from_offset < ri->minLength)) { - return MO_CONTINUE_MATCHING; - } - if (ri->quashSom) { - from_offset = 0; - } - } - - DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); - -#ifndef RELEASE_BUILD - if (ri->offsetAdjust != 0) { - // alert testing tools that we've got adjusted matches - flags |= HS_MATCH_FLAG_ADJUSTED; - } -#endif - -#ifdef DEDUPE_MATCHES - u32 dkeyCount = rose->dkeyCount; - - if (offset != scratch->deduper.current_report_offset) { - - assert(scratch->deduper.current_report_offset == ~0ULL - || scratch->deduper.current_report_offset < offset); - if (offset == scratch->deduper.current_report_offset + 1) { - fatbit_clear(scratch->deduper.log[offset % 2]); - } else { - fatbit_clear(scratch->deduper.log[0]); - fatbit_clear(scratch->deduper.log[1]); - } - - halt = flushStoredSomMatches(scratch, offset); - if (halt) { - goto do_return; - } - - scratch->deduper.current_report_offset = offset; - } - - u32 dkey = ri->dkey; - if (dkey != MO_INVALID_IDX) { - if (ri->quashSom) { - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1); - if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount, - dkey)) { - /* we have already raised this report at this offset, squash - * dupe match. */ - DEBUG_PRINTF("dedupe\n"); - goto do_return; - } - } else { - /* SOM external event */ - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1); - u64a *starts = scratch->deduper.som_start_log[to_offset % 2]; - if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount, - dkey)) { - starts[dkey] = MIN(starts[dkey], from_offset); - } else { - starts[dkey] = from_offset; - } - - if (ri->offsetAdjust) { - scratch->deduper.som_log_dirty |= 1; - } else { - scratch->deduper.som_log_dirty |= 2; - } - - goto do_return; - } - } -#endif - - halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, - flags, ci->userContext); - - if (!is_simple) { - markAsMatched(ci->exhaustionVector, ri->ekey); - } - -do_return: - if (halt) { - DEBUG_PRINTF("callback requested to terminate matches\n"); - - setBroken(ci->state, BROKEN_FROM_USER); - ci->broken = BROKEN_FROM_USER; - - return MO_HALT_MATCHING; - } - - return MO_CONTINUE_MATCHING; -} - static really_inline hwlmcb_rv_t multiDirectAdaptor(u64a real_end, ReportID direct_id, void *context, struct core_info *ci, char is_simple, @@ -1055,8 +754,7 @@ hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags, static really_inline void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *rose = id->rose; - char *state = getMultiState(id); - u8 broken = getBroken(state); + u8 broken = scratch->core_info.broken; if (broken) { DEBUG_PRINTF("stream already broken\n"); @@ -1076,8 +774,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { static never_inline void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *t = id->rose; - char *state = getMultiState(id); - u8 broken = getBroken(state); + u8 broken = scratch->core_info.broken; if (broken) { DEBUG_PRINTF("stream already broken\n"); @@ -1372,9 +1069,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (!id->offset && rose->boundary.reportZeroOffset) { DEBUG_PRINTF("zero reports\n"); processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); - broken = getBroken(state); + broken = scratch->core_info.broken; if (unlikely(broken)) { DEBUG_PRINTF("stream is broken, halting scan\n"); + setBroken(state, broken); if (broken == BROKEN_FROM_USER) { return HS_SCAN_TERMINATED; } else { @@ -1400,7 +1098,6 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (rose->hasSom && !told_to_stop_matching(scratch)) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { - setBroken(state, BROKEN_FROM_USER); scratch->core_info.broken = BROKEN_FROM_USER; } } @@ -1413,6 +1110,7 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, storeSomToStream(scratch, id->offset); } } else if (told_to_stop_matching(scratch)) { + setBroken(state, BROKEN_FROM_USER); return HS_SCAN_TERMINATED; } else { /* exhausted */ setBroken(state, BROKEN_EXHAUSTED); diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index 23f2b2827..418fcbab0 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -487,6 +487,7 @@ int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log, int halt = ci->userCallback(onmatch, from_offset, offset, flags, ci->userContext); if (halt) { + ci->broken = BROKEN_FROM_USER; return 1; } } diff --git a/src/util/report.h b/src/util/report.h index 0e5bccf81..c4f3bd8c0 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -184,6 +184,11 @@ bool isExternalReport(const Report &r) { return true; } +static inline +bool isExternalSomReport(const Report &r) { + return r.type != EXTERNAL_CALLBACK && isExternalReport(r); +} + static inline bool operator<(const Report &a, const Report &b) { ORDER_CHECK(type); From 28f379d738be56b055c7deecf495bf2feec7d532 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 1 Feb 2016 11:07:07 +1100 Subject: [PATCH 061/218] Rose: remove alignment req for anchored DFA state --- src/nfa/mcclellan.c | 22 ++++++++++++---------- src/rose/rose_build_anchored.cpp | 11 ++--------- src/rose/rose_build_bytecode.cpp | 5 ----- src/rose/stream.c | 2 +- 4 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index ef670a930..314e88e75 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1020,34 +1020,36 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); - if (top) { - *(u8 *)state = m->start_anchored; - } + + u8 s = top ? m->start_anchored : *(u8 *)state; if (m->flags & MCCLELLAN_FLAG_SINGLE) { - mcclellanExec8_i(m, (u8 *)state, buf + start_off, len - start_off, + mcclellanExec8_i(m, &s, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { - mcclellanExec8_i(m, (u8 *)state, buf + start_off, len - start_off, + mcclellanExec8_i(m, &s, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } + + *(u8 *)state = s; } void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); - if (top) { - *(u16 *)state = m->start_anchored; - } + + u16 s = top ? m->start_anchored : unaligned_load_u16(state); if (m->flags & MCCLELLAN_FLAG_SINGLE) { - mcclellanExec16_i(m, (u16 *)state, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { - mcclellanExec16_i(m, (u16 *)state, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } + + unaligned_store_u16(state, s); } char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 96393ba17..57faa46c8 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -228,7 +228,7 @@ u32 anchoredStateSize(const anchored_matcher_info &atable) { } const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr)); - return curr->state_offset + nfa->scratchStateSize; + return curr->state_offset + nfa->streamStateSize; } bool anchoredIsMulti(const anchored_matcher_info &atable) { @@ -849,15 +849,8 @@ buildAnchoredAutomataMatcher(RoseBuildImpl &build, size_t *asize) { ami->next_offset = verify_u32(curr - prev_curr); } - // State must be aligned. - u32 align_req = state_alignment(*nfa); - assert(align_req <= 2); // only DFAs. - while (state_offset % align_req) { - state_offset++; - } - ami->state_offset = state_offset; - state_offset += nfa->scratchStateSize; + state_offset += nfa->streamStateSize; ami->anchoredMinDistance = start_offset[i]; } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c7c0891a8..275f61d0f 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -582,11 +582,6 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, so->leftfixLagTable = curr_offset; curr_offset += laggedRoseCount; - // Anchored state is McClellan full state, and needs to be 2-byte aligned. - // We potentially waste a byte here. - if (curr_offset % 2) { - curr_offset++; - } so->anchorState = curr_offset; curr_offset += anchorStateSize; diff --git a/src/rose/stream.c b/src/rose/stream.c index 71984e92f..476c4f7ce 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -74,7 +74,7 @@ void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, goto next_nfa; } } else { - if (!*(u16 *)state) { + if (!unaligned_load_u16(state)) { goto next_nfa; } } From 9e9bb6a9602204398ee7fa38b4ae5fca2e1170d0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 2 Feb 2016 09:42:00 +1100 Subject: [PATCH 062/218] Rose: pack global state bits into one u8 Eliminate the RoseRuntimeState structure in favour of a single status byte that is stored in scratch and copied to/from stream state. --- src/report.h | 8 +-- src/rose/block.c | 4 +- src/rose/eod.c | 3 +- src/rose/init.c | 3 - src/rose/match.h | 11 ++-- src/rose/program_runtime.h | 4 +- src/rose/rose_build_bytecode.cpp | 6 +- src/rose/rose_dump.cpp | 1 - src/rose/rose_internal.h | 10 +--- src/rose/runtime.h | 10 +--- src/rose/stream.c | 29 +++++----- src/runtime.c | 99 +++++++++++++------------------- src/scratch.h | 23 ++++---- src/som/som_runtime.c | 2 +- 14 files changed, 84 insertions(+), 129 deletions(-) diff --git a/src/report.h b/src/report.h index 2fff3b9a4..b5d9af036 100644 --- a/src/report.h +++ b/src/report.h @@ -290,7 +290,7 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, exit: if (halt) { DEBUG_PRINTF("callback requested to terminate matches\n"); - ci->broken = BROKEN_FROM_USER; + ci->status |= STATUS_TERMINATED; return MO_HALT_MATCHING; } @@ -354,7 +354,7 @@ int roseDeliverReport(u64a offset, ReportID id, struct hs_scratch *scratch, to_offset, flags, ci->userContext); if (halt) { DEBUG_PRINTF("callback requested to terminate matches\n"); - ci->broken = BROKEN_FROM_USER; + ci->status |= STATUS_TERMINATED; return MO_HALT_MATCHING; } @@ -455,7 +455,7 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, exit: if (halt) { DEBUG_PRINTF("callback requested to terminate matches\n"); - ci->broken = BROKEN_FROM_USER; + ci->status |= STATUS_TERMINATED; return MO_HALT_MATCHING; } @@ -515,7 +515,7 @@ int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID id, if (halt) { DEBUG_PRINTF("callback requested to terminate matches\n"); - ci->broken = BROKEN_FROM_USER; + ci->status |= STATUS_TERMINATED; return MO_HALT_MATCHING; } diff --git a/src/rose/block.c b/src/rose/block.c index 3d4a008db..98dee627c 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -258,9 +258,7 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, } exit:; - u8 dummy_delay_mask = 0; - if (cleanUpDelayed(length, 0, tctxt, &dummy_delay_mask) - == HWLM_TERMINATE_MATCHING) { + if (cleanUpDelayed(length, 0, scratch) == HWLM_TERMINATE_MATCHING) { return; } diff --git a/src/rose/eod.c b/src/rose/eod.c index ade45727b..91e59521f 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -98,8 +98,7 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, hwlmExec(etable, eod_data, eod_len, adj, roseCallback, tctxt, tctxt->groups); // We may need to fire delayed matches - u8 dummy_delay_mask = 0; - return cleanUpDelayed(0, offset, tctxt, &dummy_delay_mask); + return cleanUpDelayed(0, offset, scratch); } static rose_inline diff --git a/src/rose/init.c b/src/rose/init.c index 1ec520c33..511eafe4d 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -45,10 +45,7 @@ static really_inline void init_rstate(const struct RoseEngine *t, char *state) { // Set runtime state: we take our initial groups from the RoseEngine. DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups); - struct RoseRuntimeState *rstate = getRuntimeState(state); storeGroups(t, state, t->initialGroups); - rstate->flags = 0; - rstate->broken = NOT_BROKEN; } static really_inline diff --git a/src/rose/match.h b/src/rose/match.h index 2b6dfb5d1..7d00e2acc 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -237,12 +237,13 @@ hwlmcb_rv_t flushQueuedLiterals(struct RoseContext *tctxt, u64a end) { } static really_inline -hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt, - u8 *status) { - if (can_stop_matching(tctxtToScratch(tctxt))) { +hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, + struct hs_scratch *scratch) { + if (can_stop_matching(scratch)) { return HWLM_TERMINATE_MATCHING; } + struct RoseContext *tctxt = &scratch->tctxt; if (flushQueuedLiterals(tctxt, length + offset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -250,9 +251,9 @@ hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt if (tctxt->filledDelayedSlots) { DEBUG_PRINTF("dirty\n"); - *status |= DELAY_FLOAT_DIRTY; + scratch->core_info.status |= STATUS_DELAY_DIRTY; } else { - *status &= ~DELAY_FLOAT_DIRTY; + scratch->core_info.status &= ~STATUS_DELAY_DIRTY; } tctxt->filledDelayedSlots = 0; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 1e1356e14..766b18a8f 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -211,9 +211,7 @@ hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t, struct hs_scratch *scratch) { struct core_info *ci = &scratch->core_info; if (isAllExhausted(t, ci->exhaustionVector)) { - if (!ci->broken) { - ci->broken = BROKEN_EXHAUSTED; - } + ci->status |= STATUS_EXHAUSTED; scratch->tctxt.groups = 0; DEBUG_PRINTF("all exhausted, termination requested\n"); return HWLM_TERMINATE_MATCHING; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 275f61d0f..c067b6a39 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -561,9 +561,9 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, RoseStateOffsets *so) { u32 curr_offset = 0; - // First, runtime state (stores per-stream state, like whether we need a + // First, runtime status (stores per-stream state, like whether we need a // delay rebuild or have been told to halt matching.) - curr_offset += sizeof(RoseRuntimeState); + curr_offset += sizeof(u8); // Role state storage. curr_offset += mmbit_size(rolesWithStateCount); @@ -4433,7 +4433,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &stateOffsets); scatter_plan_raw state_scatter; - buildStateScatterPlan(sizeof(RoseRuntimeState), bc.numStates, + buildStateScatterPlan(sizeof(u8), bc.numStates, activeLeftCount, rosePrefixCount, stateOffsets, cc.streaming, activeArrayCount, outfixBeginQueue, outfixEndQueue, &state_scatter); diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index adf737264..e803b8c45 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -865,7 +865,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { t->historyRequired); fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); - fprintf(f, " - runtime state : %zu bytes\n", sizeof(RoseRuntimeState)); fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState); fprintf(f, " - active array : %u bytes\n", mmbit_size(t->activeArrayCount)); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 326887da8..0d6c96e9b 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -48,8 +48,6 @@ typedef u64a rose_group; #define MAX_DELAY (DELAY_SLOT_COUNT - 1) #define DELAY_MASK (DELAY_SLOT_COUNT - 1) -#define DELAY_FLOAT_DIRTY (1U << 7) /* delay literal matched in history */ - // Direct report stuff #define LITERAL_DR_FLAG (1U << 31) #define LITERAL_MDR_FLAG ((1U << 30) | (1U << 31)) @@ -214,7 +212,7 @@ struct NfaInfo { * * State not covered by this structure includes: * - * -# the RoseRuntimeState structure + * -# the first byte, containing the status bitmask * -# the role state multibit */ struct RoseStateOffsets { @@ -476,12 +474,6 @@ struct RoseEngine { struct scatter_full_plan state_init; }; -// Rose runtime state -struct RoseRuntimeState { - u8 flags; /* high bit true if delay rebuild needed */ - u8 broken; /* user has requested that we stop matching */ -}; - struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 next_offset; /* relative to this, 0 for end */ u32 state_offset; /* relative to anchorState */ diff --git a/src/rose/runtime.h b/src/rose/runtime.h index a8587538d..414ad78fd 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -55,14 +55,6 @@ #define rose_inline really_inline -/** \brief Fetch runtime state ptr. */ -static really_inline -struct RoseRuntimeState *getRuntimeState(char *state) { - struct RoseRuntimeState *rs = (struct RoseRuntimeState *)(state); - assert(ISALIGNED_N(rs, 8)); - return rs; -} - static really_inline const void *getByOffset(const struct RoseEngine *t, u32 offset) { assert(offset < t->size); @@ -71,7 +63,7 @@ const void *getByOffset(const struct RoseEngine *t, u32 offset) { static really_inline void *getRoleState(char *state) { - return state + sizeof(struct RoseRuntimeState); + return state + sizeof(u8); // status flags } /** \brief Fetch the active array for suffix nfas. */ diff --git a/src/rose/stream.c b/src/rose/stream.c index 476c4f7ce..9b7394896 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -393,7 +393,7 @@ void roseSaveNfaStreamState(const struct RoseEngine *t, char *state, static rose_inline void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, size_t length, - u64a offset, u8 delay_rb_status) { + u64a offset) { struct RoseContext *tctxt = &scratch->tctxt; if (roseCatchUpTo(t, state, length + scratch->core_info.buf_offset, scratch, @@ -406,8 +406,6 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, roseFlushLastByteHistory(t, state, offset + length, tctxt); tctxt->lastEndOffset = offset + length; storeGroups(t, state, tctxt->groups); - struct RoseRuntimeState *rstate = getRuntimeState(state); - rstate->flags = delay_rb_status; } static really_inline @@ -418,6 +416,8 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len; DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len); + scratch->core_info.status &= ~STATUS_DELAY_DIRTY; + hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch, scratch->tctxt.groups); assert(!can_stop_matching(scratch)); @@ -446,7 +446,6 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, } char *state = scratch->core_info.state; - struct RoseRuntimeState *rstate = getRuntimeState(state); struct RoseContext *tctxt = &scratch->tctxt; tctxt->mpv_inactive = 0; @@ -475,8 +474,6 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, streamInitSufPQ(t, state, scratch); } - u8 delay_rb_status = rstate->flags; - u32 alen = t->anchoredDistance > offset ? MIN(length + offset, t->anchoredDistance) - offset : 0; @@ -507,12 +504,13 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, size_t hlength = scratch->core_info.hlen; - char rebuild = hlength && (delay_rb_status & DELAY_FLOAT_DIRTY) - && (t->maxFloatingDelayedMatch == ROSE_BOUND_INF - || offset < t->maxFloatingDelayedMatch); + char rebuild = hlength && + (scratch->core_info.status & STATUS_DELAY_DIRTY) && + (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || + offset < t->maxFloatingDelayedMatch); DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n", - rebuild, delay_rb_status, t->maxFloatingDelayedMatch, - offset); + rebuild, scratch->core_info.status, + t->maxFloatingDelayedMatch, offset); if (!flen) { if (rebuild) { /* rebuild floating delayed match stuff */ @@ -552,17 +550,16 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, flush_delay_and_exit: DEBUG_PRINTF("flushing floating\n"); - if (cleanUpDelayed(length, offset, tctxt, &delay_rb_status) - == HWLM_TERMINATE_MATCHING) { + if (cleanUpDelayed(length, offset, scratch) == HWLM_TERMINATE_MATCHING) { return; } exit: DEBUG_PRINTF("CLEAN UP TIME\n"); if (!can_stop_matching(scratch)) { - ensureStreamNeatAndTidy(t, state, scratch, length, offset, - delay_rb_status); + ensureStreamNeatAndTidy(t, state, scratch, length, offset); } - DEBUG_PRINTF("DONE STREAMING SCAN, dirty = %hhu\n", delay_rb_status); + DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n", + scratch->core_info.status); return; } diff --git a/src/runtime.c b/src/runtime.c index e38434fdc..24ee90f08 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -119,7 +119,8 @@ static really_inline void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, char *state, match_event_handler onEvent, void *userCtx, const char *data, size_t length, const u8 *history, - size_t hlen, u64a offset, UNUSED unsigned int flags) { + size_t hlen, u64a offset, u8 status, + UNUSED unsigned int flags) { assert(rose); s->core_info.userContext = userCtx; s->core_info.userCallback = onEvent ? onEvent : null_onEvent; @@ -127,7 +128,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->core_info.state = state; /* required for chained queues + evec */ s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted; - s->core_info.broken = NOT_BROKEN; + s->core_info.status = status; s->core_info.buf = (const u8 *)data; s->core_info.len = length; s->core_info.hbuf = history; @@ -140,33 +141,22 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->deduper.som_log_dirty = 1; /* som logs have not been cleared */ } -/** \brief Query whether this stream is broken. - * - * A broken stream is one on which scanning has stopped, either because the - * user has told us to (via the return value from a match callback) or because - * we have exhausted all reports. - * - * \return NOT_BROKEN, BROKEN_FROM_USER or BROKEN_EXHAUSTED. - */ +#define STATUS_VALID_BITS \ + (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY) + +/** \brief Retrieve status bitmask from stream state. */ static really_inline -u8 getBroken(const char *state) { - const struct RoseRuntimeState *ts = (const void *)state; - assert(ts->broken == NOT_BROKEN || ts->broken == BROKEN_FROM_USER - || ts->broken == BROKEN_EXHAUSTED); - return ts->broken; +u8 getStreamStatus(const char *state) { + u8 status = *(const u8 *)state; + assert((status & ~STATUS_VALID_BITS) == 0); + return status; } -/** \brief Mark this stream with the given broken flag. - * - * Possible values: NOT_BROKEN, BROKEN_FROM_USER, BROKEN_EXHAUSTED. - */ +/** \brief Store status bitmask to stream state. */ static really_inline -void setBroken(char *state, u8 broken) { - DEBUG_PRINTF("set broken=%d\n", broken); - assert(broken == NOT_BROKEN || broken == BROKEN_FROM_USER - || broken == BROKEN_EXHAUSTED); - struct RoseRuntimeState *ts = (void *)state; - ts->broken = broken; +void setStreamStatus(char *state, u8 status) { + assert((status & ~STATUS_VALID_BITS) == 0); + *(u8 *)state = status; } static really_inline @@ -585,7 +575,7 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, /* populate core info in scratch */ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data, - length, NULL, 0, 0, flags); + length, NULL, 0, 0, 0, flags); clearEvec(scratch->core_info.exhaustionVector, rose); @@ -707,6 +697,7 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose) { char *state = getMultiState(s); + setStreamStatus(state, 0); roseInitState(rose, state); clearEvec((char *)state + rose->stateOffsets.exhausted, rose); @@ -754,11 +745,9 @@ hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags, static really_inline void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *rose = id->rose; - u8 broken = scratch->core_info.broken; - if (broken) { + if (can_stop_matching(scratch)) { DEBUG_PRINTF("stream already broken\n"); - assert(broken == BROKEN_FROM_USER || broken == BROKEN_EXHAUSTED); return; } @@ -774,11 +763,9 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { static never_inline void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *t = id->rose; - u8 broken = scratch->core_info.broken; - if (broken) { + if (can_stop_matching(scratch)) { DEBUG_PRINTF("stream already broken\n"); - assert(broken == BROKEN_FROM_USER || broken == BROKEN_EXHAUSTED); return; } @@ -817,15 +804,16 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, const struct RoseEngine *rose = id->rose; char *state = getMultiState(id); + u8 status = getStreamStatus(state); - if (getBroken(state)) { + if (status == STATUS_TERMINATED || status == STATUS_EXHAUSTED) { DEBUG_PRINTF("stream is broken, just freeing storage\n"); return; } populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0, getHistory(state, rose, id->offset), - getHistoryAmount(rose, id->offset), id->offset, 0); + getHistoryAmount(rose, id->offset), id->offset, status, 0); if (rose->somLocationCount) { loadSomFromStream(scratch, id->offset); @@ -861,8 +849,7 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { DEBUG_PRINTF("told to stop matching\n"); - scratch->core_info.broken = BROKEN_FROM_USER; - DEBUG_PRINTF("broken = %hhd\n", scratch->core_info.broken); + scratch->core_info.status |= STATUS_TERMINATED; } } } @@ -931,8 +918,7 @@ static really_inline void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { assert(stream_state); assert(scratch); - - assert(!getBroken(getMultiState(stream_state))); + assert(!can_stop_matching(scratch)); DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n", stream_state->offset, scratch->core_info.len); @@ -944,7 +930,7 @@ void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { DEBUG_PRINTF("stream exhausted\n"); - scratch->core_info.broken = BROKEN_EXHAUSTED; + scratch->core_info.status = STATUS_EXHAUSTED; } } @@ -953,9 +939,9 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { assert(stream_state); assert(scratch); + assert(!can_stop_matching(scratch)); char *state = getMultiState(stream_state); - assert(!getBroken(state)); const struct RoseEngine *rose = stream_state->rose; const struct HWLM *ftable = getFLiteralMatcher(rose); @@ -982,7 +968,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { DEBUG_PRINTF("stream exhausted\n"); - scratch->core_info.broken = BROKEN_EXHAUSTED; + scratch->core_info.status |= STATUS_EXHAUSTED; } } @@ -991,6 +977,7 @@ void soleOutfixStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { assert(stream_state); assert(scratch); + assert(!can_stop_matching(scratch)); const struct RoseEngine *t = stream_state->rose; assert(t->outfixEndQueue == 1); @@ -1017,7 +1004,7 @@ void soleOutfixStreamExec(struct hs_stream *stream_state, if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) { nfaQueueCompressState(nfa, q, scratch->core_info.len); } else if (!told_to_stop_matching(scratch)) { - scratch->core_info.broken = BROKEN_EXHAUSTED; + scratch->core_info.status |= STATUS_EXHAUSTED; } } @@ -1033,13 +1020,12 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, const struct RoseEngine *rose = id->rose; char *state = getMultiState(id); - u8 broken = getBroken(state); - if (broken) { + u8 status = getStreamStatus(state); + if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) { DEBUG_PRINTF("stream is broken, halting scan\n"); - if (broken == BROKEN_FROM_USER) { + if (status & STATUS_TERMINATED) { return HS_SCAN_TERMINATED; } else { - assert(broken == BROKEN_EXHAUSTED); return HS_SUCCESS; } } @@ -1049,14 +1035,13 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, // cases here. if (unlikely(length == 0)) { DEBUG_PRINTF("zero length block\n"); - assert(getBroken(state) != BROKEN_FROM_USER); return HS_SUCCESS; } u32 historyAmount = getHistoryAmount(rose, id->offset); populateCoreInfo(scratch, rose, state, onEvent, context, data, length, getHistory(state, rose, id->offset), historyAmount, - id->offset, flags); + id->offset, status, flags); assert(scratch->core_info.hlen <= id->offset && scratch->core_info.hlen <= rose->historyRequired); @@ -1069,14 +1054,13 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (!id->offset && rose->boundary.reportZeroOffset) { DEBUG_PRINTF("zero reports\n"); processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); - broken = scratch->core_info.broken; - if (unlikely(broken)) { + if (unlikely(can_stop_matching(scratch))) { DEBUG_PRINTF("stream is broken, halting scan\n"); - setBroken(state, broken); - if (broken == BROKEN_FROM_USER) { + setStreamStatus(state, scratch->core_info.status); + if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED; } else { - assert(broken == BROKEN_EXHAUSTED); + assert(scratch->core_info.status & STATUS_EXHAUSTED); return HS_SUCCESS; } } @@ -1098,22 +1082,21 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (rose->hasSom && !told_to_stop_matching(scratch)) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { - scratch->core_info.broken = BROKEN_FROM_USER; + scratch->core_info.status |= STATUS_TERMINATED; } } + setStreamStatus(state, scratch->core_info.status); + if (likely(!can_stop_matching(scratch))) { - maintainHistoryBuffer(id->rose, getMultiState(id), data, length); + maintainHistoryBuffer(rose, state, data, length); id->offset += length; /* maintain offset */ if (rose->somLocationCount) { storeSomToStream(scratch, id->offset); } } else if (told_to_stop_matching(scratch)) { - setBroken(state, BROKEN_FROM_USER); return HS_SCAN_TERMINATED; - } else { /* exhausted */ - setBroken(state, BROKEN_EXHAUSTED); } return HS_SUCCESS; diff --git a/src/scratch.h b/src/scratch.h index 150db3f24..e082d2f89 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -70,17 +70,16 @@ struct catchup_pq { u32 qm_size; /**< current size of the priority queue */ }; +/** \brief Status flag: user requested termination. */ +#define STATUS_TERMINATED (1U << 0) -/** \brief Value indicating a stream is active (not broken). */ -#define NOT_BROKEN 0 +/** \brief Status flag: all possible matches on this stream have + * been raised (i.e. all its exhaustion keys are on.) */ +#define STATUS_EXHAUSTED (1U << 1) -/** \brief Value indicating that the user has requested that matching be - * terminated. */ -#define BROKEN_FROM_USER 1 - -/** \brief Value indicating that all possible matches on this stream have been - * raised (i.e. all its exhaustion keys are on.) */ -#define BROKEN_EXHAUSTED 2 +/** \brief Status flag: Rose requires rebuild as delay literal matched in + * history. */ +#define STATUS_DELAY_DIRTY (1U << 2) /** \brief Core information about the current scan, used everywhere. */ struct core_info { @@ -93,12 +92,12 @@ struct core_info { const struct RoseEngine *rose; char *state; /**< full stream state */ char *exhaustionVector; /**< pointer to evec for this stream */ - char broken; /**< user told us to stop, or exhausted */ const u8 *buf; /**< main scan buffer */ size_t len; /**< length of main scan buffer in bytes */ const u8 *hbuf; /**< history buffer */ size_t hlen; /**< length of history buffer in bytes. */ u64a buf_offset; /**< stream offset, for the base of the buffer */ + u8 status; /**< stream status bitmask, using STATUS_ flags above */ }; /** \brief Rose state information. */ @@ -213,12 +212,12 @@ struct fatbit **getDelaySlots(struct hs_scratch *scratch) { static really_inline char told_to_stop_matching(const struct hs_scratch *scratch) { - return scratch->core_info.broken == BROKEN_FROM_USER; + return scratch->core_info.status & STATUS_TERMINATED; } static really_inline char can_stop_matching(const struct hs_scratch *scratch) { - return scratch->core_info.broken != NOT_BROKEN; + return scratch->core_info.status & (STATUS_TERMINATED | STATUS_EXHAUSTED); } #ifdef __cplusplus diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index 418fcbab0..84eeb6013 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -487,7 +487,7 @@ int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log, int halt = ci->userCallback(onmatch, from_offset, offset, flags, ci->userContext); if (halt) { - ci->broken = BROKEN_FROM_USER; + ci->status |= STATUS_TERMINATED; return 1; } } From 09bf568d954631200e2565c9ab38c321a435ded1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 4 Feb 2016 12:46:53 +1100 Subject: [PATCH 063/218] Rose: clean up use of scratch, RoseContext --- src/rose/block.c | 7 +- src/rose/catchup.c | 135 +++++++++++++++-------------- src/rose/catchup.h | 24 +++--- src/rose/eod.c | 30 ++++--- src/rose/match.c | 168 +++++++++++++++++-------------------- src/rose/match.h | 43 +++++----- src/rose/program_runtime.h | 130 ++++++++++++++-------------- src/rose/stream.c | 13 ++- 8 files changed, 267 insertions(+), 283 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index 98dee627c..e081d3aec 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -105,7 +105,6 @@ void init_outfixes_for_block(const struct RoseEngine *t, size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf, scratch->core_info.len); if (len) { - struct RoseContext *tctxt = &scratch->tctxt; u8 *activeArray = getActiveLeafArray(t, state); const u32 activeArraySize = t->activeArrayCount; const u32 qCount = t->queueCount; @@ -114,7 +113,7 @@ void init_outfixes_for_block(const struct RoseEngine *t, fatbit_set(scratch->aqa, qCount, 0); struct mq *q = scratch->queues; - initQueue(q, 0, t, tctxt); + initQueue(q, 0, t, scratch); q->length = len; /* adjust for rev_accel */ nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, 0); @@ -258,11 +257,11 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, } exit:; - if (cleanUpDelayed(length, 0, scratch) == HWLM_TERMINATE_MATCHING) { + if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) { return; } assert(!can_stop_matching(scratch)); - roseCatchUpTo(t, state, length, scratch, 0); + roseCatchUpTo(t, scratch, length, 0); } diff --git a/src/rose/catchup.c b/src/rose/catchup.c index b302fbdd7..b84ca59c7 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -41,9 +41,9 @@ typedef struct queue_match PQ_T; #include "util/pqueue.h" static really_inline -int handleReportInternally(struct hs_scratch *scratch, ReportID id, +int handleReportInternally(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID id, u64a offset) { - const struct RoseEngine *t = scratch->core_info.rose; const struct internal_report *ri = getInternalReport(t, id); if (ri->type == EXTERNAL_CALLBACK) { return 0; @@ -53,7 +53,7 @@ int handleReportInternally(struct hs_scratch *scratch, ReportID id, return 1; } if (ri->type == INTERNAL_ROSE_CHAIN) { - roseHandleChainMatch(t, id, offset, &scratch->tctxt, 0, 1); + roseHandleChainMatch(t, scratch, id, offset, 0, 1); return 1; } @@ -61,9 +61,9 @@ int handleReportInternally(struct hs_scratch *scratch, ReportID id, } static really_inline -int handleReportInternallyNoChain(struct hs_scratch *scratch, ReportID id, +int handleReportInternallyNoChain(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID id, u64a offset) { - const struct RoseEngine *t = scratch->core_info.rose; const struct internal_report *ri = getInternalReport(t, id); if (ri->type == EXTERNAL_CALLBACK) { return 0; @@ -100,11 +100,11 @@ void currentAnchoredMatch(const struct RoseEngine *t, } static rose_inline -void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt, +void nextAnchoredMatch(const struct RoseEngine *t, struct hs_scratch *scratch, ReportID *reportId, u64a *end) { + struct RoseContext *tctxt = &scratch->tctxt; assert(tctxt->curr_anchored_loc != MMB_INVALID); - struct hs_scratch *scratch = tctxtToScratch(tctxt); struct fatbit **anchoredRows = getAnchoredLog(scratch); u32 region_width = t->anchoredMatches; @@ -141,8 +141,8 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt, } static really_inline -void deactivateQueue(u8 *aa, u32 qi, struct hs_scratch *scratch) { - const struct RoseEngine *t = scratch->core_info.rose; +void deactivateQueue(const struct RoseEngine *t, u8 *aa, u32 qi, + struct hs_scratch *scratch) { u32 aaCount = t->activeArrayCount; u32 qCount = t->queueCount; @@ -160,7 +160,7 @@ void ensureQueueActive(const struct RoseEngine *t, u32 qi, u32 qCount, struct mq *q, struct hs_scratch *scratch) { if (!fatbit_set(scratch->aqa, qCount, qi)) { DEBUG_PRINTF("initing %u\n", qi); - initQueue(q, qi, t, &scratch->tctxt); + initQueue(q, qi, t, scratch); loadStreamState(q->nfa, q, 0); pushQueueAt(q, 0, MQE_START, 0); } @@ -211,7 +211,8 @@ s64a pq_top_loc(struct catchup_pq *pq) { /* requires that we are the top item on the pq */ static really_inline -hwlmcb_rv_t runExistingNfaToNextMatch(u32 qi, struct mq *q, s64a loc, +hwlmcb_rv_t runExistingNfaToNextMatch(const struct RoseEngine *t, u32 qi, + struct mq *q, s64a loc, struct hs_scratch *scratch, u8 *aa, char report_curr) { assert(pq_top(scratch->catchup_pq.qm)->queue == qi); @@ -242,7 +243,7 @@ hwlmcb_rv_t runExistingNfaToNextMatch(u32 qi, struct mq *q, s64a loc, return HWLM_TERMINATE_MATCHING; } - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); q->cur = q->end = 0; @@ -267,7 +268,8 @@ hwlmcb_rv_t runExistingNfaToNextMatch(u32 qi, struct mq *q, s64a loc, } static really_inline -hwlmcb_rv_t runNewNfaToNextMatch(u32 qi, struct mq *q, s64a loc, +hwlmcb_rv_t runNewNfaToNextMatch(const struct RoseEngine *t, u32 qi, + struct mq *q, s64a loc, struct hs_scratch *scratch, u8 *aa, s64a report_ok_loc) { assert(!q->report_current); @@ -300,7 +302,7 @@ hwlmcb_rv_t runNewNfaToNextMatch(u32 qi, struct mq *q, s64a loc, return HWLM_TERMINATE_MATCHING; } - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); q->cur = q->end = 0; @@ -327,6 +329,7 @@ static UNUSED int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { struct RoseContext *tctxt = context; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); @@ -334,7 +337,7 @@ int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { offset, id); updateLastMatchOffset(tctxt, offset); - if (handleReportInternallyNoChain(scratch, id, offset)) { + if (handleReportInternallyNoChain(t, scratch, id, offset)) { return MO_CONTINUE_MATCHING; } @@ -345,7 +348,7 @@ int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(scratch->core_info.rose, 0, + return !roseSuffixIsExhausted(t, 0, scratch->core_info.exhaustionVector); } } @@ -356,6 +359,7 @@ int roseNfaFinalBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { struct RoseContext *tctxt = context; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); /* chained nfas are run under the control of the anchored catchup */ @@ -371,7 +375,7 @@ int roseNfaFinalBlastAdaptorNoInternal(u64a offset, ReportID id, return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(scratch->core_info.rose, 0, + return !roseSuffixIsExhausted(t, 0, scratch->core_info.exhaustionVector); } } @@ -395,7 +399,7 @@ hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues, if (roseSuffixInfoIsExhausted(t, info, scratch->core_info.exhaustionVector)) { - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); return HWLM_CONTINUE_MATCHING; } @@ -408,7 +412,7 @@ hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues, ensureEnd(q, qi, loc); - return runNewNfaToNextMatch(qi, q, loc, scratch, aa, report_ok_loc); + return runNewNfaToNextMatch(t, qi, q, loc, scratch, aa, report_ok_loc); } static really_inline @@ -429,8 +433,9 @@ s64a findSecondPlace(struct catchup_pq *pq, s64a loc_limit) { } } -hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, char *state, s64a loc, +hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, struct hs_scratch *scratch) { + char *state = scratch->core_info.state; struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); UNUSED u32 aaCount = t->activeArrayCount; @@ -453,7 +458,7 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, char *state, s64a loc, if (roseSuffixInfoIsExhausted(t, info, scratch->core_info.exhaustionVector)) { - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); goto done; } @@ -487,7 +492,7 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, char *state, s64a loc, if (!next_pos_match_loc) { /* 0 means dead */ DEBUG_PRINTF("mpv is pining for the fjords\n"); if (can_stop_matching(scratch)) { - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); return HWLM_TERMINATE_MATCHING; } @@ -527,9 +532,8 @@ int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { DEBUG_PRINTF("called\n"); if (ri->type != INTERNAL_ROSE_CHAIN) { /* INTERNAL_ROSE_CHAIN are not visible externally */ - if (roseCatchUpMPV(t, scratch->core_info.state, - offset - scratch->core_info.buf_offset, scratch) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, offset - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("done\n"); return MO_HALT_MATCHING; } @@ -538,7 +542,7 @@ int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", offset, id); - if (handleReportInternally(scratch, id, offset)) { + if (handleReportInternally(t, scratch, id, offset)) { return MO_CONTINUE_MATCHING; } @@ -563,9 +567,8 @@ int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); - if (roseCatchUpMPV(t, scratch->core_info.state, - offset - scratch->core_info.buf_offset, - scratch) == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, offset - scratch->core_info.buf_offset, scratch) == + HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("done\n"); return MO_HALT_MATCHING; } @@ -590,13 +593,14 @@ static UNUSED int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { struct RoseContext *tctxt = context; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", offset, id); updateLastMatchOffset(tctxt, offset); - if (handleReportInternallyNoChain(scratch, id, offset)) { + if (handleReportInternallyNoChain(t, scratch, id, offset)) { return MO_CONTINUE_MATCHING; } @@ -607,7 +611,7 @@ int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(scratch->core_info.rose, tctxt->curr_qi, + return !roseSuffixIsExhausted(t, tctxt->curr_qi, scratch->core_info.exhaustionVector); } } @@ -617,6 +621,7 @@ int roseNfaBlastAdaptorNoInternalNoChain(u64a offset, ReportID id, void *context) { struct RoseContext *tctxt = context; struct hs_scratch *scratch = tctxtToScratch(tctxt); + const struct RoseEngine *t = scratch->core_info.rose; /* chained nfas are run under the control of the anchored catchup */ @@ -631,7 +636,7 @@ int roseNfaBlastAdaptorNoInternalNoChain(u64a offset, ReportID id, return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(scratch->core_info.rose, tctxt->curr_qi, + return !roseSuffixIsExhausted(t, tctxt->curr_qi, scratch->core_info.exhaustionVector); } } @@ -644,9 +649,8 @@ int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); - if (roseCatchUpMPV(t, scratch->core_info.state, - offset - scratch->core_info.buf_offset, - scratch) == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, offset - scratch->core_info.buf_offset, scratch) == + HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("roseCatchUpNfas done\n"); return MO_HALT_MATCHING; } @@ -675,12 +679,12 @@ int roseNfaAdaptor(u64a offset, ReportID id, void *context) { updateLastMatchOffset(tctxt, offset); struct hs_scratch *scratch = tctxtToScratch(tctxt); - if (handleReportInternally(scratch, id, offset)) { + const struct RoseEngine *t = scratch->core_info.rose; + if (handleReportInternally(t, scratch, id, offset)) { return MO_CONTINUE_MATCHING; } - int cb_rv = tctxt->cb(offset, id, scratch); - return cb_rv; + return tctxt->cb(offset, id, scratch); } int roseNfaAdaptorNoInternal(u64a offset, ReportID id, void *context) { @@ -748,7 +752,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, if (roseSuffixInfoIsExhausted(t, info, scratch->core_info.exhaustionVector)) { - deactivateQueue(aa, a_qi, scratch); + deactivateQueue(t, aa, a_qi, scratch); return HWLM_CONTINUE_MATCHING; } @@ -776,7 +780,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, return HWLM_TERMINATE_MATCHING; } - deactivateQueue(aa, a_qi, scratch); + deactivateQueue(t, aa, a_qi, scratch); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", a_qi, final_loc); @@ -792,8 +796,8 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, assert(second_place_loc < final_loc); assert(q_cur_loc(q) >= second_place_loc); - if (runNewNfaToNextMatch(a_qi, q, final_loc, scratch, aa, report_ok_loc) - == HWLM_TERMINATE_MATCHING) { + if (runNewNfaToNextMatch(t, a_qi, q, final_loc, scratch, aa, + report_ok_loc) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("roseCatchUpNfas done\n"); return HWLM_TERMINATE_MATCHING; } @@ -833,7 +837,7 @@ void streamInitSufPQ(const struct RoseEngine *t, char *state, pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl); } else if (!alive) { - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else { assert(q->cur == q->end); /* TODO: can this be simplified? the nfa will never produce any @@ -880,7 +884,7 @@ void blockInitSufPQ(const struct RoseEngine *t, char *state, mmbit_set(aa, aaCount, qi); fatbit_set(aqa, qCount, qi); struct mq *q = queues + qi; - initQueue(q, qi, t, &scratch->tctxt); + initQueue(q, qi, t, scratch); q->length = len; /* adjust for rev_accel */ nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, 0); @@ -897,7 +901,7 @@ void blockInitSufPQ(const struct RoseEngine *t, char *state, pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl); } else if (!alive) { - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else { assert(q->cur == q->end); /* TODO: can this be simplified? the nfa will never produce any @@ -952,7 +956,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, s64a report_ok_loc = tctxt->minNonMpvMatchOffset + 1 - scratch->core_info.buf_offset; - hwlmcb_rv_t rv = roseCatchUpMPV(t, state, report_ok_loc, scratch); + hwlmcb_rv_t rv = roseCatchUpMPV(t, report_ok_loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } @@ -989,7 +993,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, } static never_inline -hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, char *state, s64a loc, +hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, s64a final_loc, struct hs_scratch *scratch) { struct RoseContext *tctxt = &scratch->tctxt; assert(t->activeArrayCount); @@ -999,6 +1003,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, char *state, s64a loc, DEBUG_PRINTF("min non mpv match offset %llu\n", scratch->tctxt.minNonMpvMatchOffset); + char *state = scratch->core_info.state; struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); @@ -1019,7 +1024,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, char *state, s64a loc, } /* catch up char matches to this point */ - if (roseCatchUpMPV(t, state, match_loc, scratch) + if (roseCatchUpMPV(t, match_loc, scratch) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("roseCatchUpNfas done\n"); return HWLM_TERMINATE_MATCHING; @@ -1046,7 +1051,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, char *state, s64a loc, DEBUG_PRINTF("second place %lld loc %lld\n", second_place_loc, loc); if (second_place_loc == q_cur_loc(q)) { - if (runExistingNfaToNextMatch(qi, q, q_final_loc, scratch, aa, 1) + if (runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 1) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1061,7 +1066,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, char *state, s64a loc, return HWLM_TERMINATE_MATCHING; } - deactivateQueue(aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); pq_pop_nice(&scratch->catchup_pq); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc); @@ -1075,7 +1080,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, char *state, s64a loc, } else { DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n", qi, q->cur, q->end, q->items[q->cur].location, loc); - runExistingNfaToNextMatch(qi, q, q_final_loc, scratch, aa, 0); + runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 0); } } exit:; @@ -1085,16 +1090,16 @@ exit:; } static really_inline -hwlmcb_rv_t roseCatchUpNfasAndMpv(const struct RoseEngine *t, char *state, +hwlmcb_rv_t roseCatchUpNfasAndMpv(const struct RoseEngine *t, s64a loc, s64a final_loc, struct hs_scratch *scratch) { - hwlmcb_rv_t rv = roseCatchUpNfas(t, state, loc, final_loc, scratch); + hwlmcb_rv_t rv = roseCatchUpNfas(t, loc, final_loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - return roseCatchUpMPV(t, state, loc, scratch); + return roseCatchUpMPV(t, loc, scratch); } @@ -1126,7 +1131,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, } /* buildSufPQ may have caught only part of the pq upto anchored_end */ - rv = roseCatchUpNfas(t, scratch->core_info.state, + rv = roseCatchUpNfas(t, anchored_end - scratch->core_info.buf_offset, loc, scratch); @@ -1137,7 +1142,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, while (anchored_report != MO_INVALID_IDX && anchored_end <= current_offset) { if (anchored_end != tctxt->minMatchOffset) { - rv = roseCatchUpNfasAndMpv(t, scratch->core_info.state, + rv = roseCatchUpNfasAndMpv(t, anchored_end - scratch->core_info.buf_offset, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { @@ -1149,7 +1154,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, assert(anchored_end == tctxt->minMatchOffset); updateLastMatchOffset(tctxt, anchored_end); - if (handleReportInternally(scratch, anchored_report, anchored_end)) { + if (handleReportInternally(t, scratch, anchored_report, anchored_end)) { goto next; } @@ -1159,7 +1164,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, return HWLM_TERMINATE_MATCHING; } next: - nextAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); + nextAnchoredMatch(t, scratch, &anchored_report, &anchored_end); DEBUG_PRINTF("catch up %u %llu\n", anchored_report, anchored_end); } @@ -1169,7 +1174,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, return HWLM_CONTINUE_MATCHING; } - rv = roseCatchUpNfas(t, scratch->core_info.state, loc, loc, scratch); + rv = roseCatchUpNfas(t, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; @@ -1180,7 +1185,7 @@ hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, if (do_full_mpv) { /* finish off any outstanding chained matches */ - rv = roseCatchUpMPV(t, scratch->core_info.state, loc, scratch); + rv = roseCatchUpMPV(t, loc, scratch); } DEBUG_PRINTF("catchup all done %llu\n", current_offset); @@ -1212,12 +1217,12 @@ hwlmcb_rv_t roseCatchUpSufAndChains(s64a loc, struct hs_scratch *scratch) { return rv; } - rv = roseCatchUpNfas(t, state, loc, loc, scratch); + rv = roseCatchUpNfas(t, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - rv = roseCatchUpMPV(t, state, loc, scratch); + rv = roseCatchUpMPV(t, loc, scratch); assert(rv != HWLM_CONTINUE_MATCHING || scratch->catchup_pq.qm_size <= t->outfixEndQueue); return rv; @@ -1237,7 +1242,7 @@ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) { return rv; } - rv = roseCatchUpNfas(t, state, loc, loc, scratch); + rv = roseCatchUpNfas(t, loc, loc, scratch); assert(rv != HWLM_CONTINUE_MATCHING || scratch->catchup_pq.qm_size <= t->outfixEndQueue); @@ -1264,7 +1269,7 @@ hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch) { updateLastMatchOffset(tctxt, anchored_end); /* as we require that there are no leaf nfas - there must be no nfa */ - if (handleReportInternallyNoChain(scratch, anchored_report, + if (handleReportInternallyNoChain(t, scratch, anchored_report, anchored_end)) { goto next; } @@ -1275,7 +1280,7 @@ hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch) { return HWLM_TERMINATE_MATCHING; } next: - nextAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); + nextAnchoredMatch(t, scratch, &anchored_report, &anchored_end); DEBUG_PRINTF("catch up %u %llu\n", anchored_report, anchored_end); } diff --git a/src/rose/catchup.h b/src/rose/catchup.h index bbbaa987d..65fd12c9d 100644 --- a/src/rose/catchup.h +++ b/src/rose/catchup.h @@ -72,8 +72,7 @@ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch); /* will only catch mpv upto last reported external match */ hwlmcb_rv_t roseCatchUpAnchoredAndSuf(s64a loc, struct hs_scratch *scratch); - -hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, char *state, s64a loc, +hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, struct hs_scratch *scratch); void blockInitSufPQ(const struct RoseEngine *t, char *state, @@ -82,8 +81,8 @@ void streamInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch); static really_inline -hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, char *state, - s64a loc, struct hs_scratch *scratch) { +hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, + struct hs_scratch *scratch) { u64a cur_offset = loc + scratch->core_info.buf_offset; assert(cur_offset >= scratch->tctxt.minMatchOffset); @@ -115,7 +114,7 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, char *state, assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */ - u8 *aa = getActiveLeafArray(t, state); + u8 *aa = getActiveLeafArray(t, scratch->core_info.state); u32 aaCount = t->activeArrayCount; if (!mmbit_isset(aa, aaCount, 0)){ @@ -126,7 +125,7 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, char *state, * they may have events pushed on during this process which may be before * the catch up point */ - return roseCatchUpMPV_i(t, state, loc, scratch); + return roseCatchUpMPV_i(t, loc, scratch); } static really_inline @@ -140,8 +139,9 @@ u64a currentAnchoredEnd(const struct RoseEngine *t, struct RoseContext *tctxt) { /* catches up nfas, anchored matches and the mpv */ static rose_inline -hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, char *state, u64a end, - struct hs_scratch *scratch, char in_anchored) { +hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + char in_anchored) { /* no need to catch up if we are at the same offset as last time */ if (end <= scratch->tctxt.minMatchOffset) { /* we must already be up to date */ @@ -149,11 +149,12 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, char *state, u64a end, return HWLM_CONTINUE_MATCHING; } + char *state = scratch->core_info.state; s64a loc = end - scratch->core_info.buf_offset; if (end <= scratch->tctxt.minNonMpvMatchOffset) { /* only need to catch up the mpv */ - return roseCatchUpMPV(t, state, loc, scratch); + return roseCatchUpMPV(t, loc, scratch); } assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset); @@ -188,8 +189,8 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, char *state, u64a end, * and suf/outfixes. The MPV will be run only to intersperse matches in * the output match stream if external matches are raised. */ static rose_inline -hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, char *state, - u64a end, struct hs_scratch *scratch, +hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, char in_anchored) { /* no need to catch up if we are at the same offset as last time */ if (end <= scratch->tctxt.minNonMpvMatchOffset) { @@ -213,6 +214,7 @@ hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, char *state, /* sadly, this branch rarely gets taken as the mpv itself is usually * alive. */ + char *state = scratch->core_info.state; if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { scratch->tctxt.minNonMpvMatchOffset = end; return HWLM_CONTINUE_MATCHING; diff --git a/src/rose/eod.c b/src/rose/eod.c index 91e59521f..c6f9e09e6 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -98,7 +98,7 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, hwlmExec(etable, eod_data, eod_len, adj, roseCallback, tctxt, tctxt->groups); // We may need to fire delayed matches - return cleanUpDelayed(0, offset, scratch); + return cleanUpDelayed(t, scratch, 0, offset); } static rose_inline @@ -111,8 +111,8 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); const size_t match_len = 0; - if (roseRunProgram(t, t->eodIterProgramOffset, offset, match_len, - &(scratch->tctxt), 0) == HWLM_TERMINATE_MATCHING) { + if (roseRunProgram(t, scratch, t->eodIterProgramOffset, offset, match_len, + 0) == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -203,12 +203,10 @@ int roseCheckNfaEod(const struct RoseEngine *t, char *state, } static rose_inline -void cleanupAfterEodMatcher(const struct RoseEngine *t, char *state, - u64a offset, struct hs_scratch *scratch) { - struct RoseContext *tctxt = &scratch->tctxt; - +void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { // Flush history to make sure it's consistent. - roseFlushLastByteHistory(t, state, offset, tctxt); + roseFlushLastByteHistory(t, scratch, offset); } static rose_inline @@ -265,8 +263,8 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset, assert(!scratch->tctxt.filledDelayedSlots); const size_t match_len = 0; - if (roseRunProgram(t, t->eodProgramOffset, offset, match_len, - &scratch->tctxt, 0) == HWLM_TERMINATE_MATCHING) { + if (roseRunProgram(t, scratch, t->eodProgramOffset, offset, match_len, 0) == + HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -313,7 +311,7 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, return; } - cleanupAfterEodMatcher(t, state, offset, scratch); + cleanupAfterEodMatcher(t, offset, scratch); // Fire any new EOD reports. if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { @@ -350,10 +348,10 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, } static rose_inline -void prepForEod(const struct RoseEngine *t, char *state, size_t length, - struct RoseContext *tctxt) { - roseFlushLastByteHistory(t, state, length, tctxt); - tctxt->lastEndOffset = length; +void prepForEod(const struct RoseEngine *t, struct hs_scratch *scratch, + size_t length) { + roseFlushLastByteHistory(t, scratch, length); + scratch->tctxt.lastEndOffset = length; } void roseBlockEodExec(const struct RoseEngine *t, u64a offset, @@ -367,7 +365,7 @@ void roseBlockEodExec(const struct RoseEngine *t, u64a offset, char *state = scratch->core_info.state; // Ensure that history is correct before we look for EOD matches - prepForEod(t, state, scratch->core_info.len, &scratch->tctxt); + prepForEod(t, scratch, scratch->core_info.len); roseEodExec_i(t, state, offset, scratch, 0); } diff --git a/src/rose/match.c b/src/rose/match.c index a91c03659..faa583032 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -102,7 +102,7 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, if (programOffset) { const size_t match_len = end - start + 1; UNUSED hwlmcb_rv_t rv = - roseRunProgram(t, programOffset, real_end, match_len, tctx, 0); + roseRunProgram(t, scratch, programOffset, real_end, match_len, 0); assert(rv != HWLM_TERMINATE_MATCHING); } @@ -121,10 +121,8 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, } static rose_inline -void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, - u64a end) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - const struct RoseEngine *t = scratch->core_info.rose; +void recordAnchoredMatch(const struct RoseEngine *t, struct hs_scratch *scratch, + ReportID reportId, u64a end) { struct fatbit **anchoredRows = getAnchoredLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", reportId, end); @@ -145,11 +143,10 @@ void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, } static rose_inline -void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id, +void recordAnchoredLiteralMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 literal_id, u64a end) { assert(end); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - const struct RoseEngine *t = scratch->core_info.rose; struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); @@ -167,10 +164,9 @@ void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id, fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); } -hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, - u64a end, struct RoseContext *tctxt, - char in_anchored, char in_catchup) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); +hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID r, + u64a end, char in_anchored, char in_catchup) { struct core_info *ci = &scratch->core_info; u8 *aa = getActiveLeafArray(t, scratch->core_info.state); @@ -197,7 +193,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, tctxt); + initQueue(q, qi, t, scratch); nfaQueueInitState(q->nfa, q); pushQueueAt(q, 0, MQE_START, loc); fatbit_set(activeQueues, qCount, qi); @@ -206,7 +202,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, /* nfa only needs one top; we can go home now */ return HWLM_CONTINUE_MATCHING; } else if (!fatbit_set(activeQueues, qCount, qi)) { - initQueue(q, qi, t, tctxt); + initQueue(q, qi, t, scratch); loadStreamState(q->nfa, q, 0); pushQueueAt(q, 0, MQE_START, 0); } else if (isQueueFull(q)) { @@ -238,7 +234,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, pushQueueNoMerge(q, MQE_END, loc); char alive = nfaQueueExec(q->nfa, q, loc); if (alive) { - tctxt->mpv_inactive = 0; + scratch->tctxt.mpv_inactive = 0; q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); } else { @@ -248,8 +244,8 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, } DEBUG_PRINTF("added mpv event at %lld\n", loc); - tctxt->next_mpv_offset = 0; /* the top event may result in matches earlier - * than expected */ + scratch->tctxt.next_mpv_offset = 0; /* the top event may result in matches + * earlier than expected */ return HWLM_CONTINUE_MATCHING; } @@ -278,12 +274,10 @@ hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end, /* handles catchup, som, cb, etc */ static really_inline -hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state, - struct RoseContext *tctxt, ReportID id, +hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID id, u64a offset, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpTo(t, state, offset, scratch, in_anchored) == + if (roseCatchUpTo(t, scratch, offset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -294,7 +288,7 @@ hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state, roseHandleSom(t, scratch, id, offset); return HWLM_CONTINUE_MATCHING; } else if (ri->type == INTERNAL_ROSE_CHAIN) { - return roseCatchUpAndHandleChainMatch(t, state, id, offset, tctxt, + return roseCatchUpAndHandleChainMatch(t, scratch, id, offset, in_anchored); } } @@ -304,25 +298,23 @@ hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state, static really_inline hwlmcb_rv_t roseHandleAnchoredDirectReport(const struct RoseEngine *t, - char *state, - struct RoseContext *tctxt, + struct hs_scratch *scratch, u64a real_end, ReportID report) { DEBUG_PRINTF("direct report %u, real_end=%llu\n", report, real_end); if (real_end > t->maxSafeAnchoredDROffset) { DEBUG_PRINTF("match in overlapped anchored region --> stash\n"); - recordAnchoredMatch(tctxt, report, real_end); + recordAnchoredMatch(t, scratch, report, real_end); return HWLM_CONTINUE_MATCHING; } - return roseHandleReport(t, state, tctxt, report, real_end, - 1 /* in anchored */); + return roseHandleReport(t, scratch, report, real_end, 1 /* in anchored */); } int roseAnchoredCallback(u64a end, u32 id, void *ctx) { struct RoseContext *tctxt = ctx; - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - char *state = ci->state; + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct core_info *ci = &scratch->core_info; const struct RoseEngine *t = ci->rose; u64a real_end = ci->buf_offset + end; // index after last byte @@ -330,7 +322,7 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end); DEBUG_PRINTF("STATE groups=0x%016llx\n", tctxt->groups); - if (can_stop_matching(tctxtToScratch(tctxt))) { + if (can_stop_matching(scratch)) { DEBUG_PRINTF("received a match when we're already dead!\n"); return MO_HALT_MATCHING; } @@ -351,8 +343,7 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { (const ReportID *)((const char *)t + t->multidirectOffset) + mdr_offset; for (; *report != MO_INVALID_IDX; report++) { - rv = roseHandleAnchoredDirectReport(t, state, tctxt, real_end, - *report); + rv = roseHandleAnchoredDirectReport(t, scratch, real_end, *report); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -361,7 +352,7 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { } else if (isLiteralDR(id)) { // Single direct report. ReportID report = literalToReport(id); - rv = roseHandleAnchoredDirectReport(t, state, tctxt, real_end, report); + rv = roseHandleAnchoredDirectReport(t, scratch, real_end, report); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -379,14 +370,14 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { DEBUG_PRINTF("literal id=%u\n", id); if (real_end <= t->floatingMinLiteralMatchOffset) { - roseFlushLastByteHistory(t, state, real_end, tctxt); + roseFlushLastByteHistory(t, scratch, real_end); tctxt->lastEndOffset = real_end; } const size_t match_len = 0; - if (roseRunProgram(t, programOffset, real_end, match_len, tctxt, 1) == + if (roseRunProgram(t, scratch, programOffset, real_end, match_len, 1) == HWLM_TERMINATE_MATCHING) { - assert(can_stop_matching(tctxtToScratch(tctxt))); + assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; } @@ -394,7 +385,7 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); if (real_end > t->floatingMinLiteralMatchOffset) { - recordAnchoredLiteralMatch(tctxt, id, real_end); + recordAnchoredLiteralMatch(t, scratch, id, real_end); } return MO_CONTINUE_MATCHING; @@ -403,14 +394,10 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { // Rose match-processing workhorse /* assumes not in_anchored */ static really_inline -hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, - size_t match_len, u32 id, - struct RoseContext *tctxt, char in_delay_play, +hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + size_t match_len, u32 id, char in_delay_play, char in_anch_playback) { - /* assert(!tctxt->in_anchored); */ - struct hs_scratch *scratch = tctxtToScratch(tctxt); - char *state = scratch->core_info.state; - DEBUG_PRINTF("id=%u\n", id); if (!in_anch_playback && !in_delay_play) { @@ -422,7 +409,7 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, mdr_offset; for (; *report != MO_INVALID_IDX; report++) { DEBUG_PRINTF("handle multi-direct report %u\n", *report); - hwlmcb_rv_t rv = roseHandleReport(t, state, tctxt, *report, end, + hwlmcb_rv_t rv = roseHandleReport(t, scratch, *report, end, 0 /* in anchored */); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -433,40 +420,42 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, // Single direct report. ReportID report = literalToReport(id); DEBUG_PRINTF("handle direct report %u\n", report); - return roseHandleReport(t, state, tctxt, report, end, + return roseHandleReport(t, scratch, report, end, 0 /* in anchored */); } } assert(id < t->literalCount); const u32 *programs = getByOffset(t, t->litProgramOffset); - return roseRunProgram(t, programs[id], end, match_len, tctxt, 0); + return roseRunProgram(t, scratch, programs[id], end, match_len, 0); } static never_inline -hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end, - u32 id, struct RoseContext *tctxt) { +hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + u32 id) { size_t match_len = 0; - return roseProcessMatch_i(t, end, match_len, id, tctxt, 1, 0); + return roseProcessMatch_i(t, scratch, end, match_len, id, 1, 0); } static never_inline hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, - u64a end, u32 id, - struct RoseContext *tctxt) { + struct hs_scratch *scratch, + u64a end, u32 id) { size_t match_len = 0; - return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 1); + return roseProcessMatch_i(t, scratch, end, match_len, id, 0, 1); } static really_inline -hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, - size_t match_len, u32 id, - struct RoseContext *tctxt) { - return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 0); +hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + size_t match_len, u32 id) { + return roseProcessMatch_i(t, scratch, end, match_len, id, 0, 0); } static rose_inline -hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, +hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, + struct hs_scratch *scratch, struct fatbit **delaySlots, u32 vicIndex, u64a offset) { /* assert(!tctxt->in_anchored); */ @@ -479,8 +468,8 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, return HWLM_CONTINUE_MATCHING; } - struct hs_scratch *scratch = tctxtToScratch(tctxt); - roseFlushLastByteHistory(t, scratch->core_info.state, offset, tctxt); + struct RoseContext *tctxt = &scratch->tctxt; + roseFlushLastByteHistory(t, scratch, offset); tctxt->lastEndOffset = offset; for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); @@ -490,7 +479,8 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, UNUSED rose_group old_groups = tctxt->groups; DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", literal_id, offset); - hwlmcb_rv_t rv = roseProcessDelayedMatch(t, offset, literal_id, tctxt); + hwlmcb_rv_t rv = + roseProcessDelayedMatch(t, scratch, offset, literal_id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* delayed literals can't safely set groups. @@ -509,8 +499,9 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, static really_inline hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, - struct RoseContext *tctxt, u32 curr_loc) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch, + u32 curr_loc) { + struct RoseContext *tctxt = &scratch->tctxt; struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; u32 region_width = t->anchored_count; @@ -523,8 +514,8 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, rose_group old_groups = tctxt->groups; DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", literal_id, curr_loc); - hwlmcb_rv_t rv = roseProcessDelayedAnchoredMatch(t, curr_loc, - literal_id, tctxt); + hwlmcb_rv_t rv = + roseProcessDelayedAnchoredMatch(t, scratch, curr_loc, literal_id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* anchored literals can't safely set groups. @@ -546,23 +537,22 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, } static really_inline -u32 anchored_it_begin(struct RoseContext *tctxt) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); +u32 anchored_it_begin(struct hs_scratch *scratch) { + struct RoseContext *tctxt = &scratch->tctxt; if (tctxt->lastEndOffset >= scratch->anchored_literal_region_len) { return MMB_INVALID; } u32 begin = tctxt->lastEndOffset; begin--; - return bf64_iterate(tctxtToScratch(tctxt)->al_log_sum, begin); + return bf64_iterate(scratch->al_log_sum, begin); } static really_inline hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, - struct RoseContext *tctxt, + struct hs_scratch *scratch, u32 *anchored_it_param, u64a to_off) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - char *state = scratch->core_info.state; + struct RoseContext *tctxt = &scratch->tctxt; u32 anchored_it = *anchored_it_param; /* catch up any remaining anchored matches */ for (; anchored_it != MMB_INVALID && anchored_it < to_off; @@ -570,10 +560,10 @@ hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, assert(anchored_it < scratch->anchored_literal_region_len); DEBUG_PRINTF("loc_it = %u\n", anchored_it); u32 curr_off = anchored_it + 1; - roseFlushLastByteHistory(t, state, curr_off, tctxt); + roseFlushLastByteHistory(t, scratch, curr_off); tctxt->lastEndOffset = curr_off; - if (flushAnchoredLiteralAtLoc(t, tctxt, curr_off) + if (flushAnchoredLiteralAtLoc(t, scratch, curr_off) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -584,22 +574,20 @@ hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, } static really_inline -hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt, +hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct hs_scratch *scratch, u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots, struct fatbit **delaySlots) { - /* assert (!tctxt->in_anchored); */ - while (victimDelaySlots) { u32 vic = findAndClearLSB_64(&victimDelaySlots); DEBUG_PRINTF("vic = %u\n", vic); u64a vicOffset = vic + (lastEnd & ~(u64a)DELAY_MASK); - if (flushAnchoredLiterals(t, tctxt, anchored_it, vicOffset) + if (flushAnchoredLiterals(t, scratch, anchored_it, vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - if (playDelaySlot(t, tctxt, delaySlots, vic % DELAY_SLOT_COUNT, + if (playDelaySlot(t, scratch, delaySlots, vic % DELAY_SLOT_COUNT, vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -609,18 +597,16 @@ hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt, } /* call flushQueuedLiterals instead */ -hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - const struct RoseEngine *t = scratch->core_info.rose; - - /* assert(!tctxt->in_anchored); */ +hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a currEnd) { + struct RoseContext *tctxt = &scratch->tctxt; u64a lastEnd = tctxt->delayLastEndOffset; DEBUG_PRINTF("flushing backed up matches @%llu up from %llu\n", currEnd, lastEnd); assert(currEnd != lastEnd); /* checked in main entry point */ - u32 anchored_it = anchored_it_begin(tctxt); + u32 anchored_it = anchored_it_begin(scratch); if (!tctxt->filledDelayedSlots) { DEBUG_PRINTF("no delayed, no flush\n"); @@ -628,7 +614,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { } { - struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt)); + struct fatbit **delaySlots = getDelaySlots(scratch); u32 lastIndex = lastEnd & DELAY_MASK; u32 currIndex = currEnd & DELAY_MASK; @@ -681,14 +667,14 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { second_half, victimDelaySlots, lastIndex); } - if (playVictims(t, tctxt, &anchored_it, lastEnd, victimDelaySlots, + if (playVictims(t, scratch, &anchored_it, lastEnd, victimDelaySlots, delaySlots) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } anchored_leftovers:; - hwlmcb_rv_t rv = flushAnchoredLiterals(t, tctxt, &anchored_it, currEnd); + hwlmcb_rv_t rv = flushAnchoredLiterals(t, scratch, &anchored_it, currEnd); tctxt->delayLastEndOffset = currEnd; return rv; } @@ -715,11 +701,11 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } - hwlmcb_rv_t rv = flushQueuedLiterals(tctx, real_end); + hwlmcb_rv_t rv = flushQueuedLiterals(t, scratch, real_end); /* flushDelayed may have advanced tctx->lastEndOffset */ if (real_end >= t->floatingMinLiteralMatchOffset) { - roseFlushLastByteHistory(t, scratch->core_info.state, real_end, tctx); + roseFlushLastByteHistory(t, scratch, real_end); tctx->lastEndOffset = real_end; } @@ -728,7 +714,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { } size_t match_len = end - start + 1; - rv = roseProcessMainMatch(t, real_end, match_len, id, tctx); + rv = roseProcessMainMatch(t, scratch, real_end, match_len, id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); diff --git a/src/rose/match.h b/src/rose/match.h index 7d00e2acc..0629d8d7b 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -77,14 +77,13 @@ void resetAnchoredLog(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->curr_row_offset); } -hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, - u64a end, struct RoseContext *tctxt, - char in_anchored, char in_catchup); +hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID r, + u64a end, char in_anchored, char in_catchup); static really_inline void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, - struct RoseContext *tctxt) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch) { const struct NfaInfo *info = getNfaInfoByQueue(t, qi); assert(scratch->fullState); q->nfa = getNfaByInfo(t, info); @@ -103,7 +102,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->cb = roseNfaAdaptor; } q->som_cb = roseNfaSomAdaptor; - q->context = tctxt; + q->context = &scratch->tctxt; q->report_current = 0; DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, " @@ -114,8 +113,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, static really_inline void initRoseQueue(const struct RoseEngine *t, u32 qi, const struct LeftNfaInfo *left, - struct RoseContext *tctxt) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch) { struct mq *q = scratch->queues + qi; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); q->nfa = getNfaByInfo(t, info); @@ -219,36 +217,41 @@ char isZombie(const struct RoseEngine *t, const char *state, return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES; } -hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a end); +hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end); static really_inline -hwlmcb_rv_t flushQueuedLiterals(struct RoseContext *tctxt, u64a end) { +hwlmcb_rv_t flushQueuedLiterals(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end) { + struct RoseContext *tctxt = &scratch->tctxt; + if (tctxt->delayLastEndOffset == end) { DEBUG_PRINTF("no progress, no flush\n"); return HWLM_CONTINUE_MATCHING; } - if (!tctxt->filledDelayedSlots && !tctxtToScratch(tctxt)->al_log_sum) { + if (!tctxt->filledDelayedSlots && !scratch->al_log_sum) { tctxt->delayLastEndOffset = end; return HWLM_CONTINUE_MATCHING; } - return flushQueuedLiterals_i(tctxt, end); + return flushQueuedLiterals_i(t, scratch, end); } static really_inline -hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, - struct hs_scratch *scratch) { +hwlmcb_rv_t cleanUpDelayed(const struct RoseEngine *t, + struct hs_scratch *scratch, size_t length, + u64a offset) { if (can_stop_matching(scratch)) { return HWLM_TERMINATE_MATCHING; } - struct RoseContext *tctxt = &scratch->tctxt; - if (flushQueuedLiterals(tctxt, length + offset) + if (flushQueuedLiterals(t, scratch, length + offset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + struct RoseContext *tctxt = &scratch->tctxt; if (tctxt->filledDelayedSlots) { DEBUG_PRINTF("dirty\n"); scratch->core_info.status |= STATUS_DELAY_DIRTY; @@ -263,13 +266,13 @@ hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, } static rose_inline -void roseFlushLastByteHistory(const struct RoseEngine *t, char *state, - u64a currEnd, struct RoseContext *tctxt) { +void roseFlushLastByteHistory(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a currEnd) { if (!t->lastByteHistoryIterOffset) { return; } - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct RoseContext *tctxt = &scratch->tctxt; struct core_info *ci = &scratch->core_info; /* currEnd is last byte of string + 1 */ @@ -286,7 +289,7 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, char *state, assert(ISALIGNED(it)); const u32 numStates = t->rolesWithStateCount; - void *role_state = getRoleState(state); + void *role_state = getRoleState(scratch->core_info.state); struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 766b18a8f..7702e737b 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -114,21 +114,22 @@ int roseCheckBenefits(const struct core_info *ci, u64a end, u32 mask_rewind, } static rose_inline -void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay, - u32 delay_index, u64a offset, - struct RoseContext *tctxt) { +void rosePushDelayedMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 delay, + u32 delay_index, u64a offset) { assert(delay); const u32 src_slot_index = delay; u32 slot_index = (src_slot_index + offset) & DELAY_MASK; + struct RoseContext *tctxt = &scratch->tctxt; if (offset + src_slot_index <= tctxt->delayLastEndOffset) { DEBUG_PRINTF("skip too late\n"); return; } const u32 delay_count = t->delay_count; - struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt)); + struct fatbit **delaySlots = getDelaySlots(scratch); struct fatbit *slot = delaySlots[slot_index]; DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); @@ -248,16 +249,15 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, if (loc + scratch->core_info.buf_offset <= tctxt->minNonMpvMatchOffset) { DEBUG_PRINTF("flushing chained\n"); - if (roseCatchUpMPV(t, scratch->core_info.state, loc, - scratch) == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, loc, scratch) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } goto done_queue_empty; } } - if (roseCatchUpTo(t, scratch->core_info.state, - loc + scratch->core_info.buf_offset, scratch, + if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -266,14 +266,13 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, assert(is_mpv); DEBUG_PRINTF("flushing chained\n"); tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (roseCatchUpMPV(t, scratch->core_info.state, loc, scratch) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMPV(t, loc, scratch) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } done_queue_empty: if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, tctxt); + initQueue(q, qi, t, scratch); nfaQueueInitState(q->nfa, q); pushQueueAt(q, 0, MQE_START, loc); fatbit_set(activeQueues, qCount, qi); @@ -292,26 +291,24 @@ hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, } static rose_inline -hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, - u32 qi, u32 top, u64a som, - u64a end, struct RoseContext *tctxt, - char in_anchored) { +hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, u32 top, + u64a som, u64a end, char in_anchored) { DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - u8 *aa = getActiveLeafArray(t, scratch->core_info.state); + struct core_info *ci = &scratch->core_info; + u8 *aa = getActiveLeafArray(t, ci->state); const u32 aaCount = t->activeArrayCount; const u32 qCount = t->queueCount; struct mq *q = &scratch->queues[qi]; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); const struct NFA *nfa = getNfaByInfo(t, info); - struct core_info *ci = &scratch->core_info; s64a loc = (s64a)end - ci->buf_offset; assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, tctxt); + initQueue(q, qi, t, scratch); nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, loc); fatbit_set(scratch->aqa, qCount, qi); @@ -320,7 +317,7 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, /* nfa only needs one top; we can go home now */ return HWLM_CONTINUE_MATCHING; } else if (!fatbit_set(scratch->aqa, qCount, qi)) { - initQueue(q, qi, t, tctxt); + initQueue(q, qi, t, scratch); loadStreamState(nfa, q, 0); pushQueueAt(q, 0, MQE_START, 0); } else if (isQueueFull(q)) { @@ -359,10 +356,8 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, } static really_inline -char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, - ReportID leftfixReport, u64a end, - struct RoseContext *tctxt) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); +char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { struct core_info *ci = &scratch->core_info; u32 ri = queueToLeftIndex(t, qi); @@ -400,7 +395,7 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, if (!fatbit_set(scratch->aqa, qCount, qi)) { DEBUG_PRINTF("initing q %u\n", qi); - initRoseQueue(t, qi, left, tctxt); + initRoseQueue(t, qi, left, scratch); if (ci->buf_offset) { // there have been writes before us! s32 sp; if (left->transient) { @@ -470,7 +465,7 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); mmbit_unset(activeLeftArray, arCount, ri); assert(!mmbit_isset(activeLeftArray, arCount, ri)); - tctxt->groups &= left->squash_mask; + scratch->tctxt.groups &= left->squash_mask; return 0; } @@ -490,9 +485,9 @@ char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, } static rose_inline -void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, - u32 topEvent, u8 cancel, struct RoseContext *tctxt) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; +void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel) { + struct core_info *ci = &scratch->core_info; s64a loc = (s64a)end - ci->buf_offset; u32 ri = queueToLeftIndex(t, qi); @@ -503,11 +498,10 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); - struct mq *q = tctxtToScratch(tctxt)->queues + qi; + struct mq *q = scratch->queues + qi; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - char *state = scratch->core_info.state; + char *state = ci->state; u8 *activeLeftArray = getActiveLeftArray(t, state); const u32 arCount = t->activeLeftCount; char alive = mmbit_set(activeLeftArray, arCount, ri); @@ -529,12 +523,12 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, if (cancel) { DEBUG_PRINTF("dominating top: (re)init\n"); fatbit_set(aqa, qCount, qi); - initRoseQueue(t, qi, left, tctxt); + initRoseQueue(t, qi, left, scratch); pushQueueAt(q, 0, MQE_START, loc); nfaQueueInitState(q->nfa, q); } else if (!fatbit_set(aqa, qCount, qi)) { DEBUG_PRINTF("initing %u\n", qi); - initRoseQueue(t, qi, left, tctxt); + initRoseQueue(t, qi, left, scratch); if (alive) { s32 sp = -(s32)loadRoseDelay(t, state, left); pushQueueAt(q, 0, MQE_START, sp); @@ -590,17 +584,15 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, * up */ static rose_inline hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, - char *state, ReportID r, u64a end, - struct RoseContext *tctxt, + struct hs_scratch *scratch, + ReportID r, u64a end, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpMpvFeeders(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpMpvFeeders(t, scratch, end, in_anchored) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0); + return roseHandleChainMatch(t, scratch, r, end, in_anchored, 0); } static really_inline @@ -667,13 +659,13 @@ int reachHasBit(const u8 *reach, u8 c) { * are satisfied. */ static rose_inline -int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex, - u32 lookaroundCount, u64a end, - struct RoseContext *tctxt) { +int roseCheckLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, u32 lookaroundIndex, + u32 lookaroundCount, u64a end) { assert(lookaroundIndex != MO_INVALID_IDX); assert(lookaroundCount > 0); - const struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + const struct core_info *ci = &scratch->core_info; DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); @@ -765,9 +757,8 @@ int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, } static rose_inline -u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, - UNUSED const u32 leftfixLag, - struct RoseContext *tctxt) { +u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, + const u32 qi, UNUSED const u32 leftfixLag) { u32 ri = queueToLeftIndex(t, qi); UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; @@ -778,7 +769,7 @@ u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, assert(leftfixLag <= left->maxLag); - struct mq *q = tctxtToScratch(tctxt)->queues + qi; + struct mq *q = scratch->queues + qi; u64a start = ~0ULL; @@ -816,9 +807,9 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { } static rose_inline -hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, - u64a end, size_t match_len, - struct RoseContext *tctxt, char in_anchored) { +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a end, size_t match_len, char in_anchored) { DEBUG_PRINTF("program begins at offset %u\n", programOffset); assert(programOffset); @@ -837,7 +828,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, // allow the program to squash groups). int work_done = 0; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct RoseContext *tctxt = &scratch->tctxt; assert(*(const u8 *)pc != ROSE_INSTR_END); @@ -922,7 +913,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { + if (!roseCheckLookaround(t, scratch, ri->index, ri->count, + end)) { DEBUG_PRINTF("failed lookaround check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; @@ -932,9 +924,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LEFTFIX) { - if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, - tctxt)) { - DEBUG_PRINTF("failed lookaround check\n"); + if (!roseTestLeftfix(t, scratch, ri->queue, ri->lag, ri->report, + end)) { + DEBUG_PRINTF("failed leftfix check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; @@ -943,13 +935,13 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(PUSH_DELAYED) { - rosePushDelayedMatch(t, ri->delay, ri->index, end, tctxt); + rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CATCH_UP) { - if (roseCatchUpTo(t, scratch->core_info.state, end, scratch, - in_anchored) == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpTo(t, scratch, end, in_anchored) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -963,7 +955,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SOM_LEFTFIX) { - som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt); + som = roseGetHaigSom(t, scratch, ri->queue, ri->lag); DEBUG_PRINTF("som from leftfix is %llu\n", som); } PROGRAM_NEXT_INSTRUCTION @@ -983,16 +975,16 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(TRIGGER_INFIX) { - roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel, - tctxt); + roseTriggerInfix(t, scratch, som, end, ri->queue, ri->event, + ri->cancel); work_done = 1; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(TRIGGER_SUFFIX) { - if (roseHandleSuffixTrigger(t, ri->queue, ri->event, som, end, - tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { + if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, + end, in_anchored) + == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1037,9 +1029,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_CHAIN) { - if (roseCatchUpAndHandleChainMatch( - t, scratch->core_info.state, ri->report, end, - tctxt, in_anchored) == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpAndHandleChainMatch(t, scratch, ri->report, end, + in_anchored) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; diff --git a/src/rose/stream.c b/src/rose/stream.c index 9b7394896..bd3021454 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -190,7 +190,7 @@ enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state, miracle_loc); if (!q_active) { fatbit_set(scratch->aqa, qCount, qi); - initRoseQueue(t, qi, left, &scratch->tctxt); + initRoseQueue(t, qi, left, scratch); } q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, miracle_loc); @@ -236,7 +236,7 @@ char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, } if (!fatbit_set(scratch->aqa, qCount, qi)) { - initRoseQueue(t, qi, left, &scratch->tctxt); + initRoseQueue(t, qi, left, scratch); s32 sp; if (ci->buf_offset) { @@ -396,14 +396,13 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, u64a offset) { struct RoseContext *tctxt = &scratch->tctxt; - if (roseCatchUpTo(t, state, length + scratch->core_info.buf_offset, scratch, - 0) - == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpTo(t, scratch, length + scratch->core_info.buf_offset, 0) == + HWLM_TERMINATE_MATCHING) { return; /* dead; no need to clean up state. */ } roseSaveNfaStreamState(t, state, scratch); roseCatchUpLeftfixes(t, state, scratch); - roseFlushLastByteHistory(t, state, offset + length, tctxt); + roseFlushLastByteHistory(t, scratch, offset + length); tctxt->lastEndOffset = offset + length; storeGroups(t, state, tctxt->groups); } @@ -550,7 +549,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, flush_delay_and_exit: DEBUG_PRINTF("flushing floating\n"); - if (cleanUpDelayed(length, offset, scratch) == HWLM_TERMINATE_MATCHING) { + if (cleanUpDelayed(t, scratch, length, offset) == HWLM_TERMINATE_MATCHING) { return; } From cd133f77eedeca8c507ead6e49d53fed7791354b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 8 Feb 2016 10:21:17 +1100 Subject: [PATCH 064/218] DEDUPE instr: generate only when necessary --- src/rose/rose_build_bytecode.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c067b6a39..81b28197d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3084,7 +3084,11 @@ void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, switch (report.type) { case EXTERNAL_CALLBACK: if (!has_som) { - makeDedupe(id, report_block); + // Dedupe is only necessary if this report has a dkey, or if there + // are SOM reports to catch up. + if (build.rm.getDkey(report) != ~0U || build.hasSom) { + makeDedupe(id, report_block); + } if (report.ekey == INVALID_EKEY) { report_block.emplace_back(ROSE_INSTR_REPORT); report_block.back().u.report.report = id; From 314da68085f393608dfd967fef7ed3f2f433009a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 8 Feb 2016 16:04:41 +1100 Subject: [PATCH 065/218] dedupeCatchup: only call when necessary at runtime --- src/report.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/report.h b/src/report.h index b5d9af036..69497da87 100644 --- a/src/report.h +++ b/src/report.h @@ -76,10 +76,9 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, from_offset, to_offset, ri->dkey, do_som); DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom); const u32 dkey = ri->dkey; - if (!do_som && dkey == MO_INVALID_IDX) { - DEBUG_PRINTF("nothing to do\n"); - return DEDUPE_CONTINUE; - } + + // We should not have been called if there's no dedupe work to do. + assert(do_som || dkey != MO_INVALID_IDX); struct match_deduper *deduper = &scratch->deduper; if (offset != deduper->current_report_offset) { @@ -272,17 +271,19 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, int halt = 0; - enum DedupeResult dedupe_rv = dedupeCatchup(rose, ri, scratch, offset, + if (do_som || ri->dkey != MO_INVALID_IDX) { + enum DedupeResult dedupe_rv = dedupeCatchup(rose, ri, scratch, offset, from_offset, to_offset, do_som); - switch (dedupe_rv) { - case DEDUPE_HALT: - halt = 1; - goto exit; - case DEDUPE_SKIP: - halt = 0; - goto exit; - case DEDUPE_CONTINUE: - break; + switch (dedupe_rv) { + case DEDUPE_HALT: + halt = 1; + goto exit; + case DEDUPE_SKIP: + halt = 0; + goto exit; + case DEDUPE_CONTINUE: + break; + } } halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, From 961e303ff344fa766536491b17bf217efffcd5b3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 8 Feb 2016 13:32:21 +1100 Subject: [PATCH 066/218] SET_GROUPS instr: don't generate more than one --- src/rose/rose_build_bytecode.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 81b28197d..558603f70 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3269,10 +3269,6 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, sort(begin(infix_program), end(infix_program)); unique_copy(begin(infix_program), end(infix_program), back_inserter(program)); - - // Groups may be cleared by an infix going quiet. Set groups immediately - // after infixes are triggered. - makeRoleGroups(g[u].groups, program); } static @@ -3373,9 +3369,13 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, makeRoleReports(build, bc, v, program); makeRoleInfixTriggers(build, bc, v, program); + + // Note: SET_GROUPS instruction must be after infix triggers, as an infix + // going dead may switch off groups. + makeRoleGroups(g[v].groups, program); + makeRoleSuffix(build, bc, v, program); makeRoleSetState(bc, v, program); - makeRoleGroups(g[v].groups, program); return program; } From 42d34f19d15272d6d28fff0ffe4cb8da90ee4a64 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 10 Feb 2016 15:33:48 +1100 Subject: [PATCH 067/218] Dump: don't call dumpNfaNotes for SOM reverse NFAs These NFAs have no queue index. --- src/rose/rose_dump.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index e803b8c45..25c6e77da 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -679,7 +679,7 @@ void dumpRevComponentInfo(const RoseEngine *t, const string &base) { ss << base << "som_rev_components.txt"; ofstream fout(ss.str().c_str()); - fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; + fout << "Index Offset\tEngine \tStates S.State Bytes\n"; const char *tp = (const char *)t; const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); @@ -696,17 +696,13 @@ void dumpRevComponentInfo(const RoseEngine *t, const string &base) { fout << left << setw(6) << n->nPositions << " "; fout << left << setw(7) << n->streamStateSize << " "; - fout << left << setw(7) << n->length << " "; - - dumpNfaNotes(fout, t, n); - + fout << left << setw(7) << n->length; fout << endl; } } static void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { - const char *tp = (const char *)t; const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); From 670eff5bc0ce7be41b563578a491a0fa58ff37b7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 10 Feb 2016 15:14:49 +1100 Subject: [PATCH 068/218] NFA merging: permit different reports For cases where the edges from start are not to a mix of accept and acceptEod, report sets can be combined. --- src/nfagraph/ng_uncalc_components.cpp | 82 ++++++++++++--------------- 1 file changed, 36 insertions(+), 46 deletions(-) diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index 47e3ff088..abba09f93 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -486,53 +486,49 @@ void buildNfaMergeQueue(const vector &cluster, } } -/** True if the graphs have compatible starts for merging, i.e. they are NOT - * both vacuous with different reports on the starts. */ +/** + * True if the graphs have mergeable starts. + * + * Nowadays, this means that any vacuous edges must have the same tops. In + * addition, mixed-accept cases need to have matching reports. + */ static bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { - bool vac1 = isVacuous(h1), vac2 = isVacuous(h2); + if (!isVacuous(h1) || !isVacuous(h2)) { + return true; + } - // Safety tests: reports should be empty on non-vacuous graphs. - if (!vac1) { - assert(h1[h1.start].reports.empty()); - assert(h1[h1.startDs].reports.empty()); + // Vacuous edges from startDs should not occur: we have better ways to + // implement true dot-star relationships. Just in case they do, ban them + // from being merged unless they have identical reports. + if (is_match_vertex(h1.startDs, h1) || is_match_vertex(h2.startDs, h2)) { + assert(0); + return false; } - if (!vac2) { - assert(h2[h2.start].reports.empty()); - assert(h2[h2.startDs].reports.empty()); + + // If both graphs have edge (start, accept), the tops must match. + auto e1_accept = edge(h1.start, h1.accept, h1); + auto e2_accept = edge(h2.start, h2.accept, h2); + if (e1_accept.second && e2_accept.second && + h1[e1_accept.first].top != h2[e2_accept.first].top) { + return false; } - if (vac1 && vac2) { - // Graphs must have the same reports on their starts to be mergeable - // (and top on start->accept). - if (h1[h1.start].reports - != h2[h2.start].reports) { - return false; - } + // If both graphs have edge (start, acceptEod), the tops must match. + auto e1_eod = edge(h1.start, h1.acceptEod, h1); + auto e2_eod = edge(h2.start, h2.acceptEod, h2); + if (e1_eod.second && e2_eod.second && + h1[e1_eod.first].top != h2[e2_eod.first].top) { + return false; + } - if (h1[h1.startDs].reports - != h2[h2.startDs].reports) { + // If one graph has an edge to accept and the other has an edge to + // acceptEod, the reports must match for the merge to be safe. + if ((e1_accept.second && e2_eod.second) || + (e2_accept.second && e1_eod.second)) { + if (h1[h1.start].reports != h2[h2.start].reports) { return false; } - - pair e1, e2; - e1 = edge(h1.start, h1.accept, h1); - e2 = edge(h2.start, h2.accept, h2); - if (e1.second || e2.second) { - if (e1.second && e2.second && - h1[e1.first].top != h2[e2.first].top) { - return false; - } - } - - e1 = edge(h1.start, h1.acceptEod, h1); - e2 = edge(h2.start, h2.acceptEod, h2); - if (e1.second || e2.second) { - if (e1.second && e2.second && - h1[e1.first].top != h2[e2.first].top) { - return false; - } - } } return true; @@ -545,20 +541,14 @@ bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, auto a_state_ids = numberStates(ga); auto b_state_ids = numberStates(gb); - // At the moment, since our vertices can only have one report ID each, - // we must ensure that our start vertices have the same report ID, - // otherwise they can't be merged. This happens in vacuous NFAs, used - // by Rose. - // XXX: the multi-top code has this limitation, too. + // Vacuous NFAs require special checks on their starts to ensure that tops + // match, and that reports match for mixed-accept cases. if (!mergeableStarts(ga, gb)) { DEBUG_PRINTF("starts aren't mergeable\n"); return false; } - // NOTE: states must be numbered already. - u32 cpl = commonPrefixLength(ga, a_state_ids, gb, b_state_ids); - if (!shouldMerge(gb, b_state_ids, ga, a_state_ids, cpl, rm, cc)) { return false; } From 7b54856642afcd061a406256472074d8b72c36ab Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 9 Feb 2016 10:01:53 +1100 Subject: [PATCH 069/218] Rose: allow block-mode merge of small prefixes Previously, we disallowed the merging of all Rose prefixes in block mode where the literal sets are not identical. This change allows merging if the prefix graphs to be merged are very small, as a small performance improvement for cases with lots of tiny prefixes. This check is deliberately conservative: graphs must have some common vertices, and the result of the merge must not give up any accelerability. --- src/nfa/limex_compile.cpp | 8 +-- src/nfa/limex_compile.h | 9 ++- src/rose/rose_build_merge.cpp | 132 ++++++++++++++++++++++++++-------- 3 files changed, 112 insertions(+), 37 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index dc372860f..7fa01d8a9 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -2187,7 +2187,7 @@ u32 countAccelStates(NGHolder &h, if (!cc.grey.allowLimExNFA) { DEBUG_PRINTF("limex not allowed\n"); - return NFA_MAX_ACCEL_STATES + 1; + return 0; } // Sanity check the input data. @@ -2201,11 +2201,11 @@ u32 countAccelStates(NGHolder &h, do_accel, state_compression, cc, num_states); // Acceleration analysis. - fillAccelInfo(bi); + nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map); - u32 num_accel = verify_u32(bi.accel.accelerable.size()); + u32 num_accel = verify_u32(bi.accel.accel_map.size()); DEBUG_PRINTF("found %u accel states\n", num_accel); - return min(num_accel, (u32)NFA_MAX_ACCEL_STATES); + return num_accel; } } // namespace ue2 diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 0e3fdea04..62a07e10a 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,11 +79,10 @@ aligned_unique_ptr generate(NGHolder &g, const CompileContext &cc); /** - * \brief For a given graph, count the number of accel states it will have in - * an implementation. + * \brief For a given graph, count the number of accelerable states it has. * - * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an - * implementation would not be constructible. + * Note that this number may be greater than the number that are actually + * implementable. */ u32 countAccelStates(NGHolder &h, const ue2::unordered_map &states, diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index e89a17728..5b992fcb4 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -105,6 +105,10 @@ static const size_t DFA_MERGE_MAX_STATES = 8000; * merging with other graphs. */ static const size_t LARGE_LBR_MIN_VERTICES = 32; +/** \brief In block mode, merge two prefixes even if they don't have identical + * literal sets if they have fewer than this many states and the merged graph + * is also small. */ +static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; static size_t small_merge_max_vertices(const CompileContext &cc) { @@ -930,6 +934,99 @@ bool compatibleLiteralsForMerge( return true; } +/** + * True if this graph has few enough accel states to be implemented as an NFA + * with all of those states actually becoming accel schemes. + */ +static +bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { + u32 num = countAccelStates(g, &build.rm, build.cc); + DEBUG_PRINTF("graph with %zu vertices has %u accel states\n", + num_vertices(g), num); + return num <= NFA_MAX_ACCEL_STATES; +} + +/** + * In block mode, we want to be a little more selective, We will only merge + * prefix engines when the literal sets are the same, or if the merged graph + * has only grown by a small amount. + */ +static +bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, + RoseVertex v) { + assert(!build.cc.streaming); + assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u)) { + return true; + } + + const RoseGraph &g = build.g; + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (g[u].literals == g[v].literals) { + return true; + } + + // The rest of this function only deals with the case when both vertices + // have graph leftfixes. + if (!g[u].left.graph || !g[v].left.graph) { + return false; + } + + const size_t u_count = num_vertices(*g[u].left.graph); + const size_t v_count = num_vertices(*g[v].left.graph); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, *g[v].left.graph); + if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + const size_t max_size = u_count + v_count; + const size_t min_size = max(u_count, v_count); + const size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) && + (isAccelerableLeftfix(build, *g[u].left.graph) || + isAccelerableLeftfix(build, *g[v].left.graph))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, RoseVertex v) { assert(u != v); @@ -938,15 +1035,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, return false; } - // UE-1675: in block mode, we want to be a little more selective -- only - // merge prefix roses when the literal sets are the same. - if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) { - assert(tbi.isRootSuccessor(v)); - - if (tbi.g[u].literals != tbi.g[v].literals) { - DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); - return false; - } + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { + return false; } /* We cannot merge prefixes/vertices if they are successors of different @@ -1102,15 +1192,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, vector> ulits; /* lit + lag pairs */ for (auto a : verts1) { - // UE-1675: in block mode, we want to be a little more selective -- - // only merge prefix roses when the literal sets are the same. - if (!tbi.cc.streaming && is_prefix) { - assert(tbi.isRootSuccessor(a)); - - if (tbi.g[u_front].literals != tbi.g[a].literals) { - DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); - return false; - } + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + return false; } u32 ulag = tbi.g[a].left.lag; @@ -1121,15 +1204,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, vector> vlits; for (auto a : verts2) { - // UE-1675: in block mode, we want to be a little more selective -- - // only merge prefix roses when the literal sets are the same. - if (!tbi.cc.streaming && is_prefix) { - assert(tbi.isRootSuccessor(a)); - - if (tbi.g[u_front].literals != tbi.g[a].literals) { - DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); - return false; - } + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + return false; } u32 vlag = tbi.g[a].left.lag; From 3e002f8181849358a07e6db9e6fe2838a3df9cac Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 16:40:16 +1100 Subject: [PATCH 070/218] NFA: Move NFAContext to stack (from scratch) --- src/nfa/limex_runtime_impl.h | 126 +++++++++++++++++------------------ src/scratch.c | 11 --- src/scratch.h | 2 - unit/internal/limex_nfa.cpp | 3 - 4 files changed, 60 insertions(+), 82 deletions(-) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 6ef3bae98..e362baef9 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -176,8 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); const u32 *exceptionMap = limex->exceptionMap; - assert(ISALIGNED_CL(ctx)); - assert(ISALIGNED_CL(&ctx->s)); STATE_T s = LOAD_STATE(&ctx->s); /* assert(ISALIGNED_16(exceptions)); */ @@ -533,17 +531,16 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { assert(q->cur + 1 < q->end); /* require at least two items */ - struct CONTEXT_T *ctx = q->scratch->nfaContext; - assert(ISALIGNED_CL(ctx)); - ctx->repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - ctx->repeat_state = q->streamState + limex->stateSize; - ctx->callback = q->cb; - ctx->context = q->context; - STORE_STATE(&ctx->cached_estate, ZERO_STATE); + struct CONTEXT_T ctx; + ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); + ctx.repeat_state = q->streamState + limex->stateSize; + ctx.callback = q->cb; + ctx.context = q->context; + STORE_STATE(&ctx.cached_estate, ZERO_STATE); assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx->s, LOAD_STATE(q->state)); + STORE_STATE(&ctx.s, LOAD_STATE(q->state)); assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -565,7 +562,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { /* do main buffer region */ DEBUG_PRINTF("MAIN BUFFER SCAN\n"); assert(ep - offset <= q->length); - if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, ctx, sp) + if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) == MO_HALT_MATCHING) { STORE_STATE(q->state, ZERO_STATE); return 0; @@ -584,19 +581,19 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx->s)); + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); return MO_ALIVE; } - JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, ctx, sp); + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } - EXPIRE_ESTATE_FN(limex, ctx, sp); + EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx->s)); + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); if (q->cur != q->end) { q->cur--; @@ -605,7 +602,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx->s)); + return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); } /* used by suffix execution in Rose */ @@ -628,16 +625,15 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { assert(q->cur + 1 < q->end); /* require at least two items */ - struct CONTEXT_T *ctx = q->scratch->nfaContext; - assert(ISALIGNED_CL(ctx)); - ctx->repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - ctx->repeat_state = q->streamState + limex->stateSize; - ctx->callback = q->cb; - ctx->context = q->context; - STORE_STATE(&ctx->cached_estate, ZERO_STATE); + struct CONTEXT_T ctx; + ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); + ctx.repeat_state = q->streamState + limex->stateSize; + ctx.callback = q->cb; + ctx.context = q->context; + STORE_STATE(&ctx.cached_estate, ZERO_STATE); DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx->s, LOAD_STATE(q->state)); + STORE_STATE(&ctx.s, LOAD_STATE(q->state)); assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -661,7 +657,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { /* do main buffer region */ u64a final_look = 0; assert(ep - offset <= q->length); - if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, ctx, sp, + if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp, &final_look) == MO_HALT_MATCHING) { DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n", final_look, sp, end_abs, offset); @@ -669,7 +665,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - STORE_STATE(q->state, LOAD_STATE(&ctx->s)); + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); return MO_MATCHES_PENDING; } @@ -685,19 +681,19 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx->s)); + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); return MO_ALIVE; } - JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, ctx, sp); + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } - EXPIRE_ESTATE_FN(limex, ctx, sp); + EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx->s)); + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); if (q->cur != q->end) { q->cur--; @@ -706,7 +702,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx->s)); + return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); } // Used for execution Rose prefix/infixes. @@ -720,15 +716,15 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, assert(q->cur + 1 < q->end); /* require at least two items */ - struct CONTEXT_T *ctx = q->scratch->nfaContext; - ctx->repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - ctx->repeat_state = q->streamState + limex->stateSize; - ctx->callback = NULL; - ctx->context = NULL; - STORE_STATE(&ctx->cached_estate, ZERO_STATE); + struct CONTEXT_T ctx; + ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); + ctx.repeat_state = q->streamState + limex->stateSize; + ctx.callback = NULL; + ctx.context = NULL; + STORE_STATE(&ctx.cached_estate, ZERO_STATE); DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx->s, LOAD_STATE(q->state)); + STORE_STATE(&ctx.s, LOAD_STATE(q->state)); assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -740,7 +736,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, if (n->maxWidth) { if (ep - sp > n->maxWidth) { sp = ep - n->maxWidth; - STORE_STATE(&ctx->s, INITIAL_FN(limex, !!sp)); + STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp)); } } assert(ep >= sp); @@ -751,7 +747,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, u64a local_ep = MIN(offset, ep); /* we are starting inside the history buffer */ STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset, - local_ep - sp, ctx, sp); + local_ep - sp, &ctx, sp); sp = local_ep; } @@ -763,30 +759,30 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, /* do main buffer region */ DEBUG_PRINTF("MAIN BUFFER SCAN\n"); assert(ep - offset <= q->length); - STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, ctx, sp); + STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp); DEBUG_PRINTF("SCAN DONE\n"); scan_done: sp = ep; - JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, ctx, sp); + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } - EXPIRE_ESTATE_FN(limex, ctx, sp); + EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END, nfa is %s\n", - ISNONZERO_STATE(ctx->s) ? "still alive" : "dead"); + ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); - STORE_STATE(q->state, LOAD_STATE(&ctx->s)); + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); - if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx->s), ctx->repeat_ctrl, - ctx->repeat_state, sp + 1, report)) { + if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl, + ctx.repeat_state, sp + 1, report)) { return MO_MATCHES_PENDING; } - return ISNONZERO_STATE(LOAD_STATE(&ctx->s)); + return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); } char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, @@ -813,42 +809,40 @@ char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { // Block mode reverse scan. char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, - const u8 *buf, size_t buflen, - const u8 *hbuf, size_t hlen, - struct hs_scratch *scratch, - NfaCallback cb, void *context) { + const u8 *buf, size_t buflen, + const u8 *hbuf, size_t hlen, + UNUSED struct hs_scratch *scratch, + NfaCallback cb, void *context) { assert(buf || hbuf); assert(buflen || hlen); - /* This may be called INSIDE another NFA, so we need a separate - * context --> Hence the nfaContextSom */ - struct CONTEXT_T *ctx = scratch->nfaContextSom; - ctx->repeat_ctrl = NULL; - ctx->repeat_state = NULL; - ctx->callback = cb; - ctx->context = context; - STORE_STATE(&ctx->cached_estate, ZERO_STATE); + struct CONTEXT_T ctx; + ctx.repeat_ctrl = NULL; + ctx.repeat_state = NULL; + ctx.callback = cb; + ctx.context = context; + STORE_STATE(&ctx.cached_estate, ZERO_STATE); const IMPL_NFA_T *limex = getImplNfa(n); - STORE_STATE(&ctx->s, INITIAL_FN(limex, 0)); // always anchored + STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored // 'buf' may be null, for example when we're scanning at EOD time. if (buflen) { assert(buf); DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen); offset -= buflen; - REV_STREAM_FN(limex, buf, buflen, ctx, offset); + REV_STREAM_FN(limex, buf, buflen, &ctx, offset); } if (hlen) { assert(hbuf); DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen); offset -= hlen; - REV_STREAM_FN(limex, hbuf, hlen, ctx, offset); + REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); } - if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx->s))) { - TESTEOD_REV_FN(limex, &ctx->s, offset, cb, context); + if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) { + TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context); } // NOTE: return value is unused. diff --git a/src/scratch.c b/src/scratch.c index 35d09bbe2..5771c8a70 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -40,7 +40,6 @@ #include "state.h" #include "ue2common.h" #include "database.h" -#include "nfa/limex_context.h" // for NFAContext128 etc #include "nfa/nfa_api_queue.h" #include "rose/rose_internal.h" #include "util/fatbit.h" @@ -101,13 +100,10 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { size_t delay_region_size = fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_count); - size_t nfa_context_size = 2 * sizeof(struct NFAContext512) + 127; - // the size is all the allocated stuff, not including the struct itself size_t size = queue_size + 63 + bStateSize + tStateSize + fullStateSize + 63 /* cacheline padding */ - + nfa_context_size + fatbit_size(proto->handledKeyCount) /* handled roles */ + fatbit_size(queueCount) /* active queue array */ + 2 * fatbit_size(deduperCount) /* need odd and even logs */ @@ -202,13 +198,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { current += tStateSize; current = ROUNDUP_PTR(current, 64); - assert(ISALIGNED_CL(current)); - s->nfaContext = current; - current += sizeof(struct NFAContext512); - current = ROUNDUP_PTR(current, 64); - assert(ISALIGNED_CL(current)); - s->nfaContextSom = current; - current += sizeof(struct NFAContext512); assert(ISALIGNED_N(current, 8)); s->deduper.som_start_log[0] = (u64a *)current; diff --git a/src/scratch.h b/src/scratch.h index e082d2f89..48b3de7d1 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -153,8 +153,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { char *bstate; /**< block mode states */ char *tstate; /**< state for transient roses */ char *qNfaState; /**< queued NFA temp state */ - void *nfaContext; /**< use for your NFAContextNNN struct */ - void *nfaContextSom; /**< use for your NFAContextNNN struct by som_runtime */ char *fullState; /**< uncompressed NFA state */ struct mq *queues; struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 989c7b605..47fd5d4a7 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -91,7 +91,6 @@ class LimExModelTest : public TestWithParam { // Mock up a scratch structure that contains the pieces that we need // for NFA execution. scratch = aligned_zmalloc_unique(sizeof(struct hs_scratch)); - scratch->nfaContext = nfa_context.get(); } virtual void initQueue() { @@ -339,7 +338,6 @@ class LimExReverseTest : public TestWithParam { // Mock up a scratch structure that contains the pieces that we need // for reverse NFA execution. scratch = aligned_zmalloc_unique(sizeof(struct hs_scratch)); - scratch->nfaContextSom = nfa_context.get(); } // NFA type (enum NFAEngineType) @@ -409,7 +407,6 @@ class LimExZombieTest : public TestWithParam { // Mock up a scratch structure that contains the pieces that we need // for NFA execution. scratch = aligned_zmalloc_unique(sizeof(struct hs_scratch)); - scratch->nfaContext = nfa_context.get(); } virtual void initQueue() { From 58f9617f66447a9ea28d2b9283316c68cf89a0f6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 16:46:15 +1100 Subject: [PATCH 071/218] NFA API: Remove nfaBlockExecReverse scratch arg Scratch is no longer used by this function's implementations. --- src/nfa/limex.h | 9 ++++----- src/nfa/limex_runtime_impl.h | 1 - src/nfa/nfa_api.h | 7 +------ src/nfa/nfa_api_dispatch.c | 7 +++---- src/som/som_runtime.c | 2 +- unit/internal/limex_nfa.cpp | 4 ++-- 6 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/nfa/limex.h b/src/nfa/limex.h index 3f1e49a3c..2c429a677 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,14 +65,13 @@ extern "C" void *state, u8 key); \ char gf_name##_B_Reverse(const struct NFA *n, u64a offset, const u8 *buf, \ size_t buflen, const u8 *hbuf, size_t hlen, \ - struct hs_scratch *scratch, NfaCallback cb, \ - void *context); \ + NfaCallback cb, void *context); \ char gf_name##_queueCompressState(const struct NFA *nfa, \ const struct mq *q, s64a loc); \ char gf_name##_expandState(const struct NFA *nfa, void *dest, \ const void *src, u64a offset, u8 key); \ - enum nfa_zombie_status gf_name##_zombie_status(const struct NFA *nfa, \ - struct mq *q, s64a loc); \ + enum nfa_zombie_status gf_name##_zombie_status(const struct NFA *nfa, \ + struct mq *q, s64a loc); \ GENERATE_NFA_DUMP_DECL(gf_name) GENERATE_NFA_DECL(nfaExecLimEx32_1) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index e362baef9..011913f94 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -811,7 +811,6 @@ char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, const u8 *buf, size_t buflen, const u8 *hbuf, size_t hlen, - UNUSED struct hs_scratch *scratch, NfaCallback cb, void *context) { assert(buf || hbuf); assert(buflen || hlen); diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h index 84a5417b5..84f5c4a01 100644 --- a/src/nfa/nfa_api.h +++ b/src/nfa/nfa_api.h @@ -44,7 +44,6 @@ extern "C" #include "callback.h" #include "ue2common.h" -struct hs_scratch; struct mq; struct NFA; @@ -212,16 +211,12 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report); * @param buflen length of buf * @param hbuf history buf * @param hlen length of hbuf - * @param scratch scratch * @param callback the callback to call for each match raised * @param context context pointer passed to each callback - * - * Note: is NOT reentrant */ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, size_t buflen, const u8 *hbuf, size_t hlen, - struct hs_scratch *scratch, NfaCallback callback, - void *context); + NfaCallback callback, void *context); /** * Check whether the given NFA's state indicates that it is in one or more diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index 0c765b36b..fb27e4ebb 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -332,13 +332,12 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, size_t buflen, const u8 *hbuf, size_t hlen, - struct hs_scratch *scratch, NfaCallback callback, - void *context) { + NfaCallback callback, void *context) { assert(nfa); assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); DISPATCH_BY_NFA_TYPE(_B_Reverse(nfa, offset, buf, buflen, hbuf, hlen, - scratch, callback, context)); + callback, context)); return 0; } diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index 84eeb6013..fa9965157 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -151,7 +151,7 @@ void runRevNfa(struct hs_scratch *scratch, const struct internal_report *ri, *from_offset = to_offset; nfaBlockExecReverse(nfa, to_offset, buf, buf_bytes, hbuf, history_bytes, - scratch, somRevCallback, from_offset); + somRevCallback, from_offset); assert(*from_offset <= to_offset); } diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 47fd5d4a7..679dc6706 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -369,7 +369,7 @@ TEST_P(LimExReverseTest, BlockExecReverse) { const size_t hlen = 0; nfaBlockExecReverse(nfa.get(), offset, buf, buflen, hbuf, hlen, - scratch.get(), onMatch, &matches); + onMatch, &matches); ASSERT_EQ(3, matches); } From c3860a9f29db08083b8addf9efb23bd01f94bb99 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 16:51:59 +1100 Subject: [PATCH 072/218] NFA API: Remove unused scratch ptr from struct mq --- src/nfa/nfa_api_queue.h | 1 - src/scratch.c | 5 ----- unit/internal/lbr.cpp | 3 +-- unit/internal/limex_nfa.cpp | 2 -- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h index 59c18fcae..1373425d8 100644 --- a/src/nfa/nfa_api_queue.h +++ b/src/nfa/nfa_api_queue.h @@ -91,7 +91,6 @@ struct mq { * history buffer; (logically) immediately before the * main buffer */ size_t hlength; /**< length of the history buffer */ - struct hs_scratch *scratch; /**< global scratch space */ char report_current; /**< * report_current matches at starting offset through * callback. If true, the queue must be located at a diff --git a/src/scratch.c b/src/scratch.c index 5771c8a70..854fc3125 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -242,11 +242,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { // Don't get too big for your boots assert((size_t)(current - (char *)s) <= alloc_size); - // Init q->scratch ptr for every queue. - for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) { - qi->scratch = s; - } - return HS_SUCCESS; } diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index 006061415..be53d9f39 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -120,7 +120,6 @@ class LbrTest : public TestWithParam { q.length = 0; // filled in by test q.history = nullptr; q.hlength = 0; - q.scratch = nullptr; // not needed by LBR q.report_current = 0; q.cb = onMatch; q.som_cb = nullptr; // only used by Haig diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 679dc6706..7c0b66e4b 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -104,7 +104,6 @@ class LimExModelTest : public TestWithParam { q.length = SCAN_DATA.size(); q.history = nullptr; q.hlength = 0; - q.scratch = scratch.get(); q.report_current = 0; q.cb = onMatch; q.som_cb = nullptr; // only used by Haig @@ -420,7 +419,6 @@ class LimExZombieTest : public TestWithParam { q.length = ZOMBIE_SCAN_DATA.length(); q.history = nullptr; q.hlength = 0; - q.scratch = scratch.get(); q.report_current = 0; q.cb = onMatch; q.som_cb = nullptr; // only used by Haig From cf00094f24df551eaf9011ab5387fdca4da013e6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 12 Feb 2016 13:52:39 +1100 Subject: [PATCH 073/218] Remove more unused structures from unit tests The NFA, LBR no longer need scratch or the NFAContext structure stored outside the NFA stack. --- unit/internal/lbr.cpp | 4 ---- unit/internal/limex_nfa.cpp | 35 ----------------------------------- 2 files changed, 39 deletions(-) diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index be53d9f39..bb180e5fe 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -36,7 +36,6 @@ #include "nfa/nfa_internal.h" #include "nfa/nfa_api_util.h" #include "nfagraph/ng_lbr.h" -#include "scratch.h" #include "util/alloc.h" #include "util/compile_context.h" #include "grey.h" @@ -155,9 +154,6 @@ class LbrTest : public TestWithParam { // Space for stream state. aligned_unique_ptr stream_state; - // Space for NFAContext structure. - aligned_unique_ptr nfa_context; - // Queue structure. struct mq q; }; diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 7c0b66e4b..6d77fffe5 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -39,7 +39,6 @@ #include "nfa/nfa_api.h" #include "nfa/nfa_api_util.h" #include "nfa/nfa_internal.h" -#include "scratch.h" #include "util/alloc.h" #include "util/target_info.h" @@ -86,11 +85,6 @@ class LimExModelTest : public TestWithParam { full_state = aligned_zmalloc_unique(nfa->scratchStateSize); stream_state = aligned_zmalloc_unique(nfa->streamStateSize); - nfa_context = aligned_zmalloc_unique(sizeof(NFAContext512)); - - // Mock up a scratch structure that contains the pieces that we need - // for NFA execution. - scratch = aligned_zmalloc_unique(sizeof(struct hs_scratch)); } virtual void initQueue() { @@ -125,12 +119,6 @@ class LimExModelTest : public TestWithParam { // Space for stream state. aligned_unique_ptr stream_state; - // Space for NFAContext structure. - aligned_unique_ptr nfa_context; - - // Mock scratch. - aligned_unique_ptr scratch; - // Queue structure. struct mq q; }; @@ -331,12 +319,6 @@ class LimExReverseTest : public TestWithParam { nfa = constructReversedNFA(g_rev, type, cc); ASSERT_TRUE(nfa != nullptr); - - nfa_context = aligned_zmalloc_unique(sizeof(NFAContext512)); - - // Mock up a scratch structure that contains the pieces that we need - // for reverse NFA execution. - scratch = aligned_zmalloc_unique(sizeof(struct hs_scratch)); } // NFA type (enum NFAEngineType) @@ -347,12 +329,6 @@ class LimExReverseTest : public TestWithParam { // Compiled NFA structure. aligned_unique_ptr nfa; - - // Space for NFAContext structure. - aligned_unique_ptr nfa_context; - - // Mock scratch. - aligned_unique_ptr scratch; }; INSTANTIATE_TEST_CASE_P(LimExReverse, LimExReverseTest, @@ -401,11 +377,6 @@ class LimExZombieTest : public TestWithParam { full_state = aligned_zmalloc_unique(nfa->scratchStateSize); stream_state = aligned_zmalloc_unique(nfa->streamStateSize); - nfa_context = aligned_zmalloc_unique(sizeof(NFAContext512)); - - // Mock up a scratch structure that contains the pieces that we need - // for NFA execution. - scratch = aligned_zmalloc_unique(sizeof(struct hs_scratch)); } virtual void initQueue() { @@ -440,12 +411,6 @@ class LimExZombieTest : public TestWithParam { // Space for stream state. aligned_unique_ptr stream_state; - // Space for NFAContext structure. - aligned_unique_ptr nfa_context; - - // Mock scratch. - aligned_unique_ptr scratch; - // Queue structure. struct mq q; }; From 1619d975c6bbc0ce4cd36c95bc5b270d43f43352 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 12 Feb 2016 13:58:17 +1100 Subject: [PATCH 074/218] limex_runtime.h: scratch header no longer needed --- src/nfa/limex_runtime.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/nfa/limex_runtime.h b/src/nfa/limex_runtime.h index 31484256e..4e111aa62 100644 --- a/src/nfa/limex_runtime.h +++ b/src/nfa/limex_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,6 @@ #include "limex_internal.h" #include "nfa_api_util.h" #include "nfa_internal.h" -#include "scratch.h" #include "util/uniform_ops.h" //////////////////////////////////////////////////////////////////////////// From 996eba9686d48b5816f9963859a8e15750c04288 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 15 Feb 2016 12:35:03 +1100 Subject: [PATCH 075/218] Add CATCH_UP to report_block, not "parent" program Also ensure that exhaustion check happens after catch up, as catch up may fire reports (which could exhaust). --- src/rose/rose_build_bytecode.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 558603f70..5354955a5 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3035,15 +3035,6 @@ void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, vector report_block; - // If this report has an exhaustion key, we can check it in the program - // rather than waiting until we're in the callback adaptor. - if (report.ekey != INVALID_EKEY) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED, - JumpTarget::NEXT_BLOCK); - ri.u.checkExhausted.ekey = report.ekey; - report_block.push_back(move(ri)); - } - // Similarly, we can handle min/max offset checks. if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, @@ -3057,7 +3048,16 @@ void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, // TODO: this could be floated in front of all the reports and only done // once. if (report.type != INTERNAL_ROSE_CHAIN) { - program.emplace_back(ROSE_INSTR_CATCH_UP); + report_block.emplace_back(ROSE_INSTR_CATCH_UP); + } + + // If this report has an exhaustion key, we can check it in the program + // rather than waiting until we're in the callback adaptor. + if (report.ekey != INVALID_EKEY) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED, + JumpTarget::NEXT_BLOCK); + ri.u.checkExhausted.ekey = report.ekey; + report_block.push_back(move(ri)); } // External SOM reports need their SOM value calculated. From fb8747295e0dd7007c33cc41bf44e76486cfc4ca Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 13:54:51 +1100 Subject: [PATCH 076/218] roseTestLeftfix: unify common "nfa is dead" code --- src/rose/program_runtime.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 7702e737b..b45607e27 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -439,9 +439,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, if (left->infix) { if (infixTooOld(q, loc)) { DEBUG_PRINTF("infix %u died of old age\n", ri); - scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(activeLeftArray, arCount, ri); - return 0; + goto nfa_dead; } reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); @@ -449,9 +447,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); - scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(activeLeftArray, arCount, ri); - return 0; + goto nfa_dead; } #ifdef DEBUG @@ -463,10 +459,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); if (!rv) { /* nfa is dead */ DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); - mmbit_unset(activeLeftArray, arCount, ri); - assert(!mmbit_isset(activeLeftArray, arCount, ri)); - scratch->tctxt.groups &= left->squash_mask; - return 0; + goto nfa_dead; } // Queue must have next start loc before we call nfaInAcceptState. @@ -482,6 +475,11 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); return rv; } + +nfa_dead: + mmbit_unset(activeLeftArray, arCount, ri); + scratch->tctxt.groups &= left->squash_mask; + return 0; } static rose_inline From bba8b6263561b477a6b02e2b0ff72007a76fb0ba Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 13:55:48 +1100 Subject: [PATCH 077/218] Use rose_inline rather than really_inline --- src/rose/program_runtime.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index b45607e27..a8734cb7e 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -283,7 +283,7 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, return roseHaltIfExhausted(t, scratch); } -static really_inline +static rose_inline hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, char in_anchored) { @@ -355,7 +355,7 @@ hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } -static really_inline +static rose_inline char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { struct core_info *ci = &scratch->core_info; @@ -593,7 +593,7 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, return roseHandleChainMatch(t, scratch, r, end, in_anchored, 0); } -static really_inline +static rose_inline void roseHandleSom(const struct RoseEngine *t, struct hs_scratch *scratch, ReportID id, u64a end) { DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, From 8f3471dd4e10f5b5fd8e868bc13d5d2d6dfc008b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 14:38:12 +1100 Subject: [PATCH 078/218] Rename reduceQueue to reduceInfixQueue --- src/rose/infix.h | 5 +++-- src/rose/program_runtime.h | 6 +++--- src/rose/stream.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/rose/infix.h b/src/rose/infix.h index 785d6354d..0cc63fdf0 100644 --- a/src/rose/infix.h +++ b/src/rose/infix.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,12 +63,13 @@ int infixTooOld(struct mq *q, s64a curr_loc) { * engine. */ static really_inline -void reduceQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { +void reduceInfixQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { assert(q->end > q->cur); assert(maxTops); u32 qlen = q->end - q->cur; /* includes MQE_START */ DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops, maxAge); + char any_work = 0; if (maxAge && q->items[q->cur].location + maxAge < curr_loc) { any_work = 1; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index a8734cb7e..075f0fc61 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -442,7 +442,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, goto nfa_dead; } - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); } if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { @@ -540,10 +540,10 @@ void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, pushQueueAt(q, 0, MQE_START, loc); nfaQueueInitState(q->nfa, q); } else if (isQueueFull(q)) { - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); if (isQueueFull(q)) { - /* still full - reduceQueue did nothing */ + /* still full - reduceInfixQueue did nothing */ DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, q->end - q->cur); pushQueueNoMerge(q, MQE_END, loc); diff --git a/src/rose/stream.c b/src/rose/stream.c index bd3021454..161e00596 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -291,7 +291,7 @@ char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, DEBUG_PRINTF("infix died of old age\n"); return 0; } - reduceQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth); + reduceInfixQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth); } DEBUG_PRINTF("end scan at %lld\n", last_loc); From 4d5710a84a5d1d44ab909547453cf8544d9c0812 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 14:40:04 +1100 Subject: [PATCH 079/218] Rename rosePrefixCheckMiracles to roseLeftfix... --- src/rose/program_runtime.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 075f0fc61..bdada9e14 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -142,9 +142,9 @@ void rosePushDelayedMatch(const struct RoseEngine *t, } static rose_inline -char rosePrefixCheckMiracles(const struct RoseEngine *t, - const struct LeftNfaInfo *left, - struct core_info *ci, struct mq *q, u64a end) { +char roseLeftfixCheckMiracles(const struct RoseEngine *t, + const struct LeftNfaInfo *left, + struct core_info *ci, struct mq *q, u64a end) { if (left->transient) { // Miracles won't help us with transient leftfix engines; they only // scan for a limited time anyway. @@ -445,7 +445,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); } - if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { + if (!roseLeftfixCheckMiracles(t, left, ci, q, end)) { DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); goto nfa_dead; } From 7a775e5fab96b365d4faa0a8fbf409a2bf0bf758 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 14:54:10 +1100 Subject: [PATCH 080/218] reduceInfixQueue: factor out any_work check --- src/rose/infix.h | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/rose/infix.h b/src/rose/infix.h index 0cc63fdf0..e3abc7fda 100644 --- a/src/rose/infix.h +++ b/src/rose/infix.h @@ -44,6 +44,26 @@ int infixTooOld(struct mq *q, s64a curr_loc) { return q_last_loc(q) + maxAge < curr_loc; } +static really_inline +int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { + u32 qlen = q->end - q->cur; /* includes MQE_START */ + + if (maxAge && q->items[q->cur].location + maxAge < curr_loc) { + return 1; + } + + if (qlen - 1 > maxTops) { + return 1; + } + + if (qlen - 1 == maxTops + && q->items[q->cur].location != q->items[q->cur + 1].location) { + /* we can advance start to the first top location */ + return 1; + } + + return 0; +} /** * Removes tops which are known not to affect the final state from the queue. @@ -70,22 +90,7 @@ void reduceInfixQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops, maxAge); - char any_work = 0; - if (maxAge && q->items[q->cur].location + maxAge < curr_loc) { - any_work = 1; - } - - if (qlen - 1 > maxTops) { - any_work = 1; - } - - if (qlen - 1 == maxTops - && q->items[q->cur].location != q->items[q->cur + 1].location) { - /* we can advance start to the first top location */ - any_work = 1; - } - - if (!any_work) { + if (!canReduceQueue(q, curr_loc, maxTops, maxAge)) { DEBUG_PRINTF("nothing to do\n"); return; } From 5a1dd54049021440b4544ee42aa33423804f60b2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 15:21:18 +1100 Subject: [PATCH 081/218] Split CHECK_LEFTFIX into CHECK_{INFIX,PREFIX} --- src/rose/program_runtime.h | 57 +++++++++++++++++++++++--------- src/rose/rose_build_bytecode.cpp | 37 +++++++++++++++------ src/rose/rose_dump.cpp | 10 +++++- src/rose/rose_program.h | 13 ++++++-- 4 files changed, 89 insertions(+), 28 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index bdada9e14..590d9ec31 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -144,8 +144,9 @@ void rosePushDelayedMatch(const struct RoseEngine *t, static rose_inline char roseLeftfixCheckMiracles(const struct RoseEngine *t, const struct LeftNfaInfo *left, - struct core_info *ci, struct mq *q, u64a end) { - if (left->transient) { + struct core_info *ci, struct mq *q, u64a end, + const char is_infix) { + if (!is_infix && left->transient) { // Miracles won't help us with transient leftfix engines; they only // scan for a limited time anyway. return 1; @@ -178,7 +179,7 @@ char roseLeftfixCheckMiracles(const struct RoseEngine *t, // If we're a prefix, then a miracle effectively results in us needing to // re-init our state and start fresh. - if (!left->infix) { + if (!is_infix) { if (miracle_loc != begin_loc) { DEBUG_PRINTF("re-init prefix state\n"); q->cur = q->end = 0; @@ -355,9 +356,10 @@ hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } -static rose_inline +static really_inline char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end, + const char is_infix) { struct core_info *ci = &scratch->core_info; u32 ri = queueToLeftIndex(t, qi); @@ -365,10 +367,12 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", (left->transient ? "transient" : "active"), - (left->infix ? "infix" : "prefix"), + (is_infix ? "infix" : "prefix"), ri, qi, leftfixLag, left->maxLag); assert(leftfixLag <= left->maxLag); + assert(left->infix == is_infix); + assert(!is_infix || !left->transient); // Only prefixes can be transient. struct mq *q = scratch->queues + qi; char *state = scratch->core_info.state; @@ -398,7 +402,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, initRoseQueue(t, qi, left, scratch); if (ci->buf_offset) { // there have been writes before us! s32 sp; - if (left->transient) { + if (!is_infix && left->transient) { sp = -(s32)ci->hlen; } else { sp = -(s32)loadRoseDelay(t, state, left); @@ -408,7 +412,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, * at stream boundary */ pushQueueAt(q, 0, MQE_START, sp); - if (left->infix || (ci->buf_offset + sp > 0 && !left->transient)) { + if (is_infix || (ci->buf_offset + sp > 0 && !left->transient)) { loadStreamState(q->nfa, q, sp); } else { pushQueueAt(q, 1, MQE_TOP, sp); @@ -425,7 +429,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, assert(loc >= q_cur_loc(q)); assert(leftfixReport != MO_INVALID_IDX); - if (left->transient) { + if (!is_infix && left->transient) { s64a start_loc = loc - left->transient; if (q_cur_loc(q) < start_loc) { q->cur = q->end = 0; @@ -436,7 +440,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, } if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { - if (left->infix) { + if (is_infix) { if (infixTooOld(q, loc)) { DEBUG_PRINTF("infix %u died of old age\n", ri); goto nfa_dead; @@ -445,7 +449,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); } - if (!roseLeftfixCheckMiracles(t, left, ci, q, end)) { + if (!roseLeftfixCheckMiracles(t, left, ci, q, end, is_infix)) { DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); goto nfa_dead; } @@ -482,6 +486,18 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, return 0; } +static rose_inline +char roseTestPrefix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { + return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 0); +} + +static rose_inline +char roseTestInfix(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) { + return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 1); +} + static rose_inline void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel) { @@ -921,10 +937,21 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LEFTFIX) { - if (!roseTestLeftfix(t, scratch, ri->queue, ri->lag, ri->report, - end)) { - DEBUG_PRINTF("failed leftfix check\n"); + PROGRAM_CASE(CHECK_INFIX) { + if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, + end)) { + DEBUG_PRINTF("failed infix check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_PREFIX) { + if (!roseTestPrefix(t, scratch, ri->queue, ri->lag, ri->report, + end)) { + DEBUG_PRINTF("failed prefix check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5354955a5..4518b4340 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -194,7 +194,8 @@ class RoseInstruction { case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; - case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; + case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix; + case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; case ROSE_INSTR_CATCH_UP: return &u.catchUp; @@ -236,7 +237,8 @@ class RoseInstruction { case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); - case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); + case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix); + case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); @@ -277,7 +279,8 @@ class RoseInstruction { ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; - ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix; + ROSE_STRUCT_CHECK_INFIX checkInfix; + ROSE_STRUCT_CHECK_PREFIX checkPrefix; ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; ROSE_STRUCT_PUSH_DELAYED pushDelayed; ROSE_STRUCT_CATCH_UP catchUp; @@ -2724,8 +2727,11 @@ flattenProgram(const vector> &programs) { case ROSE_INSTR_CHECK_LOOKAROUND: ri.u.checkLookaround.fail_jump = jump_val; break; - case ROSE_INSTR_CHECK_LEFTFIX: - ri.u.checkLeftfix.fail_jump = jump_val; + case ROSE_INSTR_CHECK_INFIX: + ri.u.checkInfix.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_PREFIX: + ri.u.checkPrefix.fail_jump = jump_val; break; case ROSE_INSTR_DEDUPE: ri.u.dedupe.fail_jump = jump_val; @@ -2986,11 +2992,22 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!build.cc.streaming || build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX, JumpTarget::NEXT_BLOCK); - ri.u.checkLeftfix.queue = lni.queue; - ri.u.checkLeftfix.lag = build.g[v].left.lag; - ri.u.checkLeftfix.report = build.g[v].left.leftfix_report; - program.push_back(ri); + bool is_prefix = build.isRootSuccessor(v); + if (is_prefix) { + auto ri = + RoseInstruction(ROSE_INSTR_CHECK_PREFIX, JumpTarget::NEXT_BLOCK); + ri.u.checkPrefix.queue = lni.queue; + ri.u.checkPrefix.lag = build.g[v].left.lag; + ri.u.checkPrefix.report = build.g[v].left.leftfix_report; + program.push_back(move(ri)); + } else { + auto ri = + RoseInstruction(ROSE_INSTR_CHECK_INFIX, JumpTarget::NEXT_BLOCK); + ri.u.checkInfix.queue = lni.queue; + ri.u.checkInfix.lag = build.g[v].left.lag; + ri.u.checkInfix.report = build.g[v].left.leftfix_report; + program.push_back(move(ri)); + } } static diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 25c6e77da..a188c78b7 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -288,7 +288,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LEFTFIX) { + PROGRAM_CASE(CHECK_INFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + os << " report " << ri->report << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_PREFIX) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; os << " report " << ri->report << endl; diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 81852f097..36a9aabda 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -49,7 +49,8 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. - ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. + ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. + ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. @@ -141,7 +142,15 @@ struct ROSE_STRUCT_CHECK_LOOKAROUND { u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_STRUCT_CHECK_LEFTFIX { +struct ROSE_STRUCT_CHECK_INFIX { + u8 code; //!< From enum RoseInstructionCode. + u32 queue; //!< Queue of leftfix to check. + u32 lag; //!< Lag of leftfix for this case. + ReportID report; //!< ReportID of leftfix to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_PREFIX { u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue of leftfix to check. u32 lag; //!< Lag of leftfix for this case. From 030cc2a487a3744dd670fd00e76cc4bd3f90029a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 Feb 2016 15:57:56 +1100 Subject: [PATCH 082/218] Update Rose state layout description Many of the things in this list no longer existed. --- src/rose/runtime.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 414ad78fd..e7e953774 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -41,16 +41,18 @@ /* * ROSE STATE LAYOUT: - * state multibit - * runtime state structure - * full history table - * last history table - * short history table - * short queues (two multibits) - * last queues (two multibits) - * active array - * delay rb dirty - * nfa state + * + * - runtime status byte (halt status, delay rebuild dirty, etc) + * - rose state multibit + * - active leaf array (multibit) + * - active leftfix array (multibit) + * - leftfix lag table + * - anchored matcher state + * - literal groups + * - history buffer + * - exhausted bitvector + * - som slots, som multibit arrays + * - nfa stream state (for each nfa) */ #define rose_inline really_inline From 129578f970be97367de804c3811c90c1ab4a4abc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 15 Feb 2016 13:41:21 +1100 Subject: [PATCH 083/218] Rose program: Improvements to debug/assertions - Add current pc to debug printf. - Assert that pc doesn't escape the RoseEngine structure. --- src/rose/program_runtime.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 590d9ec31..3fbaeb32c 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -808,10 +808,10 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { return end >= min_bound && end <= max_bound; } - #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ - DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \ + DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ + programOffset + (u32)(pc - pc_base)); \ const struct ROSE_STRUCT_##name *ri = \ (const struct ROSE_STRUCT_##name *)pc; @@ -848,7 +848,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, for (;;) { assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); - u8 code = *(const u8 *)pc; + assert(pc >= pc_base); + assert((pc - pc_base) < t->size); + const u8 code = *(const u8 *)pc; assert(code <= ROSE_INSTR_END); switch ((enum RoseInstructionCode)code) { From c055f81398b7e1e51fb00f615994d8880ddeab4c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 15 Feb 2016 13:50:05 +1100 Subject: [PATCH 084/218] Dump: give abs jump targets in output --- src/rose/rose_dump.cpp | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index a188c78b7..4eaab3264 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -235,11 +235,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { for (;;) { u8 code = *(const u8 *)pc; assert(code <= ROSE_INSTR_END); + const size_t offset = pc - pc_base; switch (code) { PROGRAM_CASE(ANCHORED_DELAY) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; - os << " done_jump +" << ri->done_jump << endl; + os << " done_jump " << offset + ri->done_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -263,27 +264,27 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_ONLY_EOD) { - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_BOUNDS) { os << " min_bound " << ri->min_bound << endl; os << " max_bound " << ri->max_bound << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_NOT_HANDLED) { os << " key " << ri->key << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { os << " index " << ri->index << endl; os << " count " << ri->count << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaround(os, t, ri); } PROGRAM_NEXT_INSTRUCTION @@ -292,7 +293,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; os << " report " << ri->report << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -300,7 +301,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; os << " report " << ri->report << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -349,14 +350,14 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(DEDUPE) { os << " report " << ri->report << endl; dumpReport(os, t, ri->report); - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE_SOM) { os << " report " << ri->report << endl; dumpReport(os, t, ri->report); - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -404,14 +405,14 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(CHECK_EXHAUSTED) { os << " ekey " << ri->ekey << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MIN_LENGTH) { os << " end_adj " << ri->end_adj << endl; os << " min_length " << ri->min_length << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -434,7 +435,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(CHECK_STATE) { os << " index " << ri->index << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -442,7 +443,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; dumpJumpTable(os, t, ri); - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -450,7 +451,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; os << " state " << ri->state << endl; - os << " fail_jump +" << ri->fail_jump << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION From 6e8f394d8d6e18845cf488d1649e3fcff690b038 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 15 Feb 2016 16:17:58 +1100 Subject: [PATCH 085/218] Make comparison signed (fix warning) --- src/rose/program_runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 3fbaeb32c..3d25d6824 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -849,7 +849,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, for (;;) { assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); assert(pc >= pc_base); - assert((pc - pc_base) < t->size); + assert((size_t)(pc - pc_base) < t->size); const u8 code = *(const u8 *)pc; assert(code <= ROSE_INSTR_END); From d77ee0839dbc06b25c797a2e4b7f172de3d10ca7 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 15 Feb 2016 15:16:15 +1100 Subject: [PATCH 086/218] Improved test for AVX2 support Test for xsave and XMM/YMM registers. Use ICC dynamic feature detection where available. --- src/util/cpuid_flags.c | 58 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index 58cfeb326..9a8bd922e 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,8 @@ #define SSSE3 (1 << 9) #define SSE4_1 (1 << 19) #define SSE4_2 (1 << 20) +#define XSAVE (1 << 27) +#define AVX (1 << 28) // EDX #define SSE (1 << 25) @@ -51,6 +53,10 @@ #define AVX2 (1 << 5) #define BMI2 (1 << 8) +// Extended Control Register 0 (XCR0) values +#define XCR0_SSE (1 << 1) +#define XCR0_AVX (1 << 2) + static __inline void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { @@ -66,19 +72,61 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, #endif } -u64a cpuid_flags(void) { +static inline +u64a xgetbv(u32 op) { +#if defined(_WIN32) || defined(__INTEL_COMPILER) + return _xgetbv(op); +#else + u32 a, d; + __asm__ volatile ( + "xgetbv\n" + : "=a"(a), + "=d"(d) + : "c"(op)); + return ((u64a)d << 32) + a; +#endif +} + +static +int check_avx2(void) { +#if defined(__INTEL_COMPILER) + return _may_i_use_cpu_feature(_FEATURE_AVX2); +#else unsigned int eax, ebx, ecx, edx; - u64a cap = 0; - // version info cpuid(1, 0, &eax, &ebx, &ecx, &edx); - /* ECX and EDX contain capability flags */ + /* check AVX is supported and XGETBV is enabled by OS */ + if ((ecx & (AVX | XSAVE)) != (AVX | XSAVE)) { + DEBUG_PRINTF("AVX and XSAVE not supported\n"); + return 0; + } + /* check that SSE and AVX registers are enabled by OS */ + u64a xcr0 = xgetbv(0); + if ((xcr0 & (XCR0_SSE | XCR0_AVX)) != (XCR0_SSE | XCR0_AVX)) { + DEBUG_PRINTF("SSE and AVX registers not enabled\n"); + return 0; + } + + /* ECX and EDX contain capability flags */ ecx = 0; cpuid(7, 0, &eax, &ebx, &ecx, &edx); if (ebx & AVX2) { + DEBUG_PRINTF("AVX2 enabled\n"); + return 1; + } + + return 0; +#endif +} + +u64a cpuid_flags(void) { + u64a cap = 0; + + if (check_avx2()) { + DEBUG_PRINTF("AVX2 enabled\n"); cap |= HS_CPU_FEATURES_AVX2; } From 6294b8a89b067519261696b469c0f7c3d4dfc759 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 16 Feb 2016 09:25:18 +1100 Subject: [PATCH 087/218] Rose: assign DRs in allocateFinalLiteralId Previously, direct reports were allocated earlier; now all final IDs are assigned in the same place. --- src/rose/rose_build_compile.cpp | 99 +++++++++++++++------------------ 1 file changed, 46 insertions(+), 53 deletions(-) diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 6202299ba..c71743bfe 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -247,6 +247,44 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { return false; } +static +void makeDirectReport(RoseBuildImpl &build, u32 i) { + if (build.literals.right.at(i).table == ROSE_FLOATING) { + build.floating_direct_report = true; + } + + rose_literal_info &info = build.literal_info[i]; + assert(!info.vertices.empty()); + + vector reports; + for (const auto &v : info.vertices) { + const auto &r = build.g[v].reports; + reports.insert(end(reports), begin(r), end(r)); + } + sort(begin(reports), end(reports)); + reports.erase(unique(begin(reports), end(reports)), end(reports)); + + if (reports.size() == 1) { + // A single direct report. We set the high bit to indicate it's a + // direct report and encode the ReportID itself in the final_id + // field. + ReportID report = reports.front(); + assert(!(report & LITERAL_DR_FLAG)); + info.final_id = LITERAL_DR_FLAG | report; + DEBUG_PRINTF("direct report %u -> %u\n", info.final_id, report); + } else { + // A multi-direct report. Here we write the report set into a list + // to be triggered when we see this literal. + u32 mdr_index = verify_u32(build.mdr_reports.size()); + info.final_id = LITERAL_MDR_FLAG | mdr_index; + DEBUG_PRINTF("multi direct report %u -> [%s]\n", info.final_id, + as_string_list(reports).c_str()); + build.mdr_reports.insert(end(build.mdr_reports), begin(reports), + end(reports)); + build.mdr_reports.push_back(MO_INVALID_IDX); + } +} + static void allocateFinalLiteralId(RoseBuildImpl &tbi) { /* allocate final literal ids - these are the literal ids used in the @@ -264,15 +302,18 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { assert(tbi.final_id_to_literal.empty()); u32 next_final_id = 0; for (u32 i = 0; i < tbi.literal_info.size(); i++) { - if (tbi.hasFinalId(i)) { - continue; - } + assert(!tbi.hasFinalId(i)); if (!isUsedLiteral(tbi, i)) { /* what is this literal good for? absolutely nothing */ continue; } + if (tbi.isDirectReport(i)) { + makeDirectReport(tbi, i); + continue; + } + // The special EOD event literal has its own program and does not need // a real literal ID. if (i == tbi.eod_event_literal_id) { @@ -305,51 +346,6 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { &next_final_id); } -static -void findDirectReports(RoseBuildImpl &tbi) { - const RoseGraph &g = tbi.g; - - for (u32 i = 0; i < tbi.literal_info.size(); i++) { - if (!tbi.isDirectReport(i)) { - continue; - } - - if (tbi.literals.right.at(i).table == ROSE_FLOATING) { - tbi.floating_direct_report = true; - } - - rose_literal_info &info = tbi.literal_info[i]; - const auto &verts = info.vertices; - - assert(!verts.empty()); - if (verts.size() == 1 && g[*verts.begin()].reports.size() == 1) { - // A single direct report. We set the high bit to indicate it's a - // direct report and encode the ReportID itself in the final_id - // field. - ReportID report = *(g[*verts.begin()].reports.begin()); - assert(!(report & LITERAL_DR_FLAG)); - info.final_id = LITERAL_DR_FLAG | report; - } else { - // A multi-direct report. Here we write the report set into a list - // to be triggered when we see this literal. - u32 mdr_index = verify_u32(tbi.mdr_reports.size()); - info.final_id = LITERAL_MDR_FLAG | mdr_index; - - // Temporary set for deduplication and determinism. - flat_set reports; - - for (auto v : verts) { - insert(&reports, g[v].reports); - } - tbi.mdr_reports.insert(tbi.mdr_reports.end(), reports.begin(), - reports.end()); - tbi.mdr_reports.push_back(MO_INVALID_IDX); - } - - DEBUG_PRINTF("allocating final id %u to %u\n", info.final_id, i); - } -} - #define MAX_EXPLOSION_NC 3 static bool limited_explosion(const ue2_literal &s) { @@ -2172,9 +2168,8 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { // If we've got a very small number of EOD-anchored literals, consider // moving them into the floating table so that we only have one literal - // matcher to run. Note that this should happen before findDirectReports as - // it modifies the literal/vertex set. Note also that this needs to happen - // before addAnchoredSmallBlockLiterals as it may create anchored literals. + // matcher to run. Note that this needs to happen before + // addAnchoredSmallBlockLiterals as it may create anchored literals. assert(roleOffsetsAreValid(g)); stealEodVertices(*this); @@ -2190,8 +2185,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { assert(roleOffsetsAreValid(g)); handleMixedSensitivity(); - findDirectReports(*this); - assignHistories(*this); convertAnchPrefixToBounds(*this); From 31dea6c2759973d61ea7f354e5749d872e113250 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 16 Feb 2016 09:29:07 +1100 Subject: [PATCH 088/218] Rose: allow direct reports to be aliased --- src/rose/rose_build_role_aliasing.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 57bbc5504..1f8734031 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -347,13 +347,7 @@ bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &tbi) { return false; } - u32 id = *props.literals.begin(); - assert(id != MO_INVALID_IDX); - - // Ignore direct reports. - if (tbi.isDirectReport(id)) { - return false; - } + assert(*props.literals.begin() != MO_INVALID_IDX); // Any vertex involved in a "ghost" relationship has already been disallowed From cc5db616868c03d88e48a4637b42dbf6f3c26a7e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 16 Feb 2016 17:14:13 +1100 Subject: [PATCH 089/218] Rose: allow DR literals to share vertices --- src/rose/rose_build_bytecode.cpp | 12 +++++------- src/rose/rose_build_compile.cpp | 18 ++++++++++-------- src/rose/rose_build_impl.h | 12 +++++++++++- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 4518b4340..a6ab726df 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -501,7 +501,7 @@ bool isPureFloating(const RoseBuildImpl &tbi) { continue; } - if (!tbi.hasDirectFinalId(v) || !tbi.isFloating(v)) { + if (!tbi.allDirectFinalIds(v) || !tbi.isFloating(v)) { DEBUG_PRINTF("vertex %zu isn't floating and direct\n", g[v].idx); return false; } @@ -3979,16 +3979,14 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { const auto &g = build.g; for (const auto &e : edges_range(g)) { const auto &v = target(e, g); - if (build.hasDirectFinalId(v)) { - // Skip direct reports, which do not have RoseLiteral entries. - continue; - } for (const auto &lit_id : g[v].literals) { assert(lit_id < build.literal_info.size()); u32 final_id = build.literal_info.at(lit_id).final_id; - if (final_id != MO_INVALID_IDX) { - unique_lit_edge_map[final_id].insert(e); + if (final_id == MO_INVALID_IDX || final_id & LITERAL_MDR_FLAG) { + // Unused, special or direct report IDs are handled elsewhere. + continue; } + unique_lit_edge_map[final_id].insert(e); } } diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index c71743bfe..70bf0cf3a 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -588,11 +588,7 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { // role's reports from a list. for (auto v : info.vertices) { - if (g[v].literals.size() != 1) { - return false; // Avoid roles with multiple literals at this stage. - } - - assert(*g[v].literals.begin() == id); + assert(contains(g[v].literals, id)); if (g[v].reports.empty() || g[v].eod_accept || // no accept EOD @@ -903,11 +899,17 @@ bool RoseBuildImpl::hasDirectFinalId(u32 id) const { return literal_info.at(id).final_id & LITERAL_MDR_FLAG; } -bool RoseBuildImpl::hasDirectFinalId(RoseVertex v) const { - if (g[v].literals.empty()) { +bool RoseBuildImpl::allDirectFinalIds(RoseVertex v) const { + const auto &lits = g[v].literals; + if (lits.empty()) { return false; } - return hasDirectFinalId(*g[v].literals.begin()); + for (const auto &lit : lits) { + if (!hasDirectFinalId(lit)) { + return false; + } + } + return true; } bool RoseBuildImpl::hasFinalId(u32 id) const { diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index c6d10063f..1a5e77aa3 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -440,8 +440,18 @@ class RoseBuildImpl : public RoseBuild { bool isDirectReport(u32 id) const; bool isDelayed(u32 id) const; + + /** + * \brief True if the given literal ID is a direct or multi-direct report. + */ bool hasDirectFinalId(u32 id) const; - bool hasDirectFinalId(RoseVertex v) const; + + /** + * \brief True if all the literals associated with the given vertex are + * direct or multi-direct reports. + */ + bool allDirectFinalIds(RoseVertex v) const; + bool hasFinalId(u32 id) const; bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored From b62711a18f176ee89f00557c742145fc0ede5395 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 17 Feb 2016 10:36:22 +1100 Subject: [PATCH 090/218] Rose: only use direct reports for external reports --- src/rose/rose_build_compile.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 70bf0cf3a..db1e49eeb 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -598,6 +598,13 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { return false; } + // Use the program to handle cases that aren't external reports. + for (const ReportID &id : g[v].reports) { + if (!isExternalReport(rm.getReport(id))) { + return false; + } + } + if (literals.right.at(id).table == ROSE_ANCHORED) { /* in-edges are irrelevant for anchored region. */ continue; From 0d9422eed76f91f90092717dca94dd96fd00be80 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 17 Feb 2016 10:42:41 +1100 Subject: [PATCH 091/218] Rose: make DR runtime path external report only --- src/rose/match.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index faa583032..d626950bc 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -274,25 +274,17 @@ hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end, /* handles catchup, som, cb, etc */ static really_inline -hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, - struct hs_scratch *scratch, ReportID id, - u64a offset, char in_anchored) { +hwlmcb_rv_t roseHandleDirectReport(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID id, + u64a offset, char in_anchored) { + // The direct report path is only used for external reports. + assert(isExternalReport(getInternalReport(t, id))); + if (roseCatchUpTo(t, scratch, offset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - const struct internal_report *ri = getInternalReport(t, id); - if (ri) { - if (isInternalSomReport(ri)) { - roseHandleSom(t, scratch, id, offset); - return HWLM_CONTINUE_MATCHING; - } else if (ri->type == INTERNAL_ROSE_CHAIN) { - return roseCatchUpAndHandleChainMatch(t, scratch, id, offset, - in_anchored); - } - } - return roseHandleMatch(t, id, offset, scratch); } @@ -308,7 +300,8 @@ hwlmcb_rv_t roseHandleAnchoredDirectReport(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } - return roseHandleReport(t, scratch, report, real_end, 1 /* in anchored */); + return roseHandleDirectReport(t, scratch, report, real_end, + 1 /* in anchored */); } int roseAnchoredCallback(u64a end, u32 id, void *ctx) { @@ -409,8 +402,8 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, mdr_offset; for (; *report != MO_INVALID_IDX; report++) { DEBUG_PRINTF("handle multi-direct report %u\n", *report); - hwlmcb_rv_t rv = roseHandleReport(t, scratch, *report, end, - 0 /* in anchored */); + hwlmcb_rv_t rv = roseHandleDirectReport(t, scratch, *report, + end, 0 /* in anchored */); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -420,8 +413,8 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, // Single direct report. ReportID report = literalToReport(id); DEBUG_PRINTF("handle direct report %u\n", report); - return roseHandleReport(t, scratch, report, end, - 0 /* in anchored */); + return roseHandleDirectReport(t, scratch, report, end, + 0 /* in anchored */); } } From f519fd9bcdc0ad2257e0cf4ec50b208caac64e75 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 18 Feb 2016 16:37:15 +1100 Subject: [PATCH 092/218] Rose: don't assume roles with >1 lit need dedupe We only require dedupe for such roles when they have literals that can arrive simultaneously (i.e. one literal overlaps with the suffix of another). --- src/rose/rose_build_misc.cpp | 49 +++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 66b0bdd44..942909685 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -641,6 +641,38 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } } +static +vector makePath(const rose_literal_id &lit) { + vector path(begin(lit.s), end(lit.s)); + for (u32 i = 0; i < lit.delay; i++) { + path.push_back(CharReach::dot()); + } + return path; +} + +/** + * \brief True if one of the given literals overlaps with the suffix of + * another, meaning that they could arrive at the same offset. + */ +static +bool literalsCouldRace(const rose_literal_id &lit1, + const rose_literal_id &lit2) { + DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n", + dumpString(lit1.s).c_str(), lit1.delay, + dumpString(lit2.s).c_str(), lit2.delay); + + // Add dots on the end of each literal for delay. + const auto v1 = makePath(lit1); + const auto v2 = makePath(lit2); + + // See if the smaller path is a suffix of the larger path. + const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; + const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; + auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), + overlaps); + return r.first == smaller->rend(); +} + bool RoseDedupeAuxImpl::requiresDedupeSupport( const ue2::flat_set &reports) const { /* TODO: this could be expanded to check for offset or character @@ -689,11 +721,20 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( has_role = true; - /* TODO: extend handled roles so that we don't have to worry about - * multiple literals */ if (g[v].literals.size() > 1) { - return true; /* fear that role may be triggered multiple times - * at same offset. */ + const auto &lits = g[v].literals; + DEBUG_PRINTF("vertex %zu lits: %s\n", g[v].idx, + as_string_list(lits).c_str()); + for (auto it = begin(lits); it != end(lits); ++it) { + const auto &lit1 = tbi.literals.right.at(*it); + for (auto jt = next(it); jt != end(lits); ++jt) { + const auto &lit2 = tbi.literals.right.at(*jt); + if (literalsCouldRace(lit1, lit2)) { + DEBUG_PRINTF("literals could race\n"); + return true; + } + } + } } if (g[v].eod_accept) { From 6bcccb4c5d1fe69fa173547c7a4d0e36a1312452 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 22 Feb 2016 10:58:23 +1100 Subject: [PATCH 093/218] Rose: further generalise literal dedupe work --- src/rose/rose_build_misc.cpp | 50 ++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 942909685..1df1b2d99 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -680,7 +680,6 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( const RoseGraph &g = tbi.g; - bool has_role = false; bool has_suffix = false; bool has_outfix = false; @@ -713,33 +712,40 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( } /* roles */ + + map lits; // Literal ID -> count of occurrences. + + const bool has_role = !roles.empty(); for (auto v : roles) { - if (has_role) { - return true; /* fear that multiple roles may trigger at same - offset */ + for (const auto &lit : g[v].literals) { + lits[lit]++; } - - has_role = true; - - if (g[v].literals.size() > 1) { - const auto &lits = g[v].literals; - DEBUG_PRINTF("vertex %zu lits: %s\n", g[v].idx, - as_string_list(lits).c_str()); - for (auto it = begin(lits); it != end(lits); ++it) { - const auto &lit1 = tbi.literals.right.at(*it); - for (auto jt = next(it); jt != end(lits); ++jt) { - const auto &lit2 = tbi.literals.right.at(*jt); - if (literalsCouldRace(lit1, lit2)) { - DEBUG_PRINTF("literals could race\n"); - return true; - } + if (g[v].eod_accept) { + // Literals plugged into this EOD accept must be taken into account + // as well. + for (auto u : inv_adjacent_vertices_range(v, g)) { + for (const auto &lit : g[u].literals) { + lits[lit]++; } } } + } - if (g[v].eod_accept) { - if (in_degree(v, g) > 1) { - /* may actually map to a number of terminal vertices */ + /* literals */ + + for (const auto &m : lits) { + if (m.second > 1) { + DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first); + return true; + } + } + + for (auto it = begin(lits); it != end(lits); ++it) { + const auto &lit1 = tbi.literals.right.at(it->first); + for (auto jt = next(it); jt != end(lits); ++jt) { + const auto &lit2 = tbi.literals.right.at(jt->first); + if (literalsCouldRace(lit1, lit2)) { + DEBUG_PRINTF("literals could race\n"); return true; } } From a7daa70942bc0b8d8fb93969a222dd2898a18665 Mon Sep 17 00:00:00 2001 From: Xiang Wang Date: Fri, 11 Dec 2015 13:27:53 -0500 Subject: [PATCH 094/218] Castle: exclusive analysis for multiple subcastle chunks Apply clique analysis to subcastle chunks if the number of subcastles is large and check the status of each chunk separately at runtime. --- src/nfa/castle.c | 285 +++++++++++++++++++------------------- src/nfa/castle_internal.h | 72 +++++++--- src/nfa/castlecompile.cpp | 215 +++++++++++++++++----------- 3 files changed, 335 insertions(+), 237 deletions(-) diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 274e5705a..5558381d5 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -111,17 +111,22 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) { DEBUG_PRINTF("offset=%llu\n", offset); if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - DEBUG_PRINTF("subcastle %u\n", activeIdx); - if (activeIdx < c->numRepeats && subCastleReportCurrent(c, q, - offset, activeIdx) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + DEBUG_PRINTF("subcastle %u\n", activeIdx); + if (subCastleReportCurrent(c, q, + offset, activeIdx) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("subcastle %u\n", i); @@ -168,9 +173,12 @@ char castleInAccept(const struct Castle *c, struct mq *q, } if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); if (subCastleInAccept(c, q, report, offset, activeIdx)) { return 1; @@ -178,11 +186,10 @@ char castleInAccept(const struct Castle *c, struct mq *q, } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; - i = mmbit_iterate(active, c->numRepeats, i)) { + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("subcastle %u\n", i); if (subCastleInAccept(c, q, report, offset, i)) { return 1; @@ -197,7 +204,6 @@ static really_inline void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, void *full_state, void *stream_state, const u32 subIdx) { - u8 *active = (u8 *)stream_state; const struct SubCastle *sub = getSubCastle(c, subIdx); const struct RepeatInfo *info = getRepeatInfo(sub); @@ -207,10 +213,13 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset); - if (sub->exclusive) { - partial_store_u32(stream_state, c->numRepeats, c->activeIdxSize); + if (sub->exclusiveId < c->numRepeats) { + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + mmbit_unset(groups, c->numGroups, sub->exclusiveId); } else { - mmbit_unset(active + c->activeIdxSize, c->numRepeats, subIdx); + u8 *active = (u8 *)stream_state + c->activeOffset; + mmbit_unset(active, c->numRepeats, subIdx); } } } @@ -226,16 +235,20 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, } if (c->exclusive) { - const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, activeIdx); } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)stream_state + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)stream_state + c->activeOffset; const struct mmbit_sparse_iter *it = (const void *)((const char *)c + c->staleIterOffset); @@ -266,12 +279,20 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, info->packedCtrlSize; char is_alive = 0; - if (sub->exclusive) { - const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - is_alive = (activeIdx == top); - partial_store_u32(stream_state, top, c->activeIdxSize); + u8 *active = (u8 *)stream_state; + if (sub->exclusiveId < c->numRepeats) { + u8 *groups = active + c->groupIterOffset; + active += sub->exclusiveId * c->activeIdxSize; + if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) { + const u32 activeIdx = partial_load_u32(active, c->activeIdxSize); + is_alive = (activeIdx == top); + } + + if (!is_alive) { + partial_store_u32(active, top, c->activeIdxSize); + } } else { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + active += c->activeOffset; is_alive = mmbit_set(active, c->numRepeats, top); } @@ -309,11 +330,11 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin, u64a match = repeatNextMatch(info, rctrl, rstate, begin); if (match == 0) { DEBUG_PRINTF("no more matches for sub %u\n", subIdx); - if (sub->exclusive) { - partial_store_u32(stream_state, c->numRepeats, - c->activeIdxSize); + if (sub->exclusiveId < c->numRepeats) { + u8 *groups = (u8 *)stream_state + c->groupIterOffset; + mmbit_unset(groups, c->numGroups, sub->exclusiveId); } else { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + u8 *active = (u8 *)stream_state + c->activeOffset; mmbit_unset(active, c->numRepeats, subIdx); } return; @@ -346,16 +367,20 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, *mloc = 0; if (c->exclusive) { - const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, &found, activeIdx); } } - if (!c->pureExclusive) { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)stream_state + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { @@ -384,31 +409,38 @@ u64a subCastleNextMatch(const struct Castle *c, void *full_state, return repeatNextMatch(info, rctrl, rstate, loc); } +static really_inline +void set_matching(const struct Castle *c, const u64a match, u8 *active, + u8 *matching, const u32 active_size, const u32 active_id, + const u32 matching_id, u64a *offset, const u64a end) { + if (match == 0) { + DEBUG_PRINTF("no more matches\n"); + mmbit_unset(active, active_size, active_id); + } else if (match > end) { + // If we had a local copy of the active mmbit, we could skip + // looking at this repeat again. But we don't, so we just move + // on. + } else if (match == *offset) { + mmbit_set(matching, c->numRepeats, matching_id); + } else if (match < *offset) { + // New minimum offset. + *offset = match; + mmbit_clear(matching, c->numRepeats); + mmbit_set(matching, c->numRepeats, matching_id); + } +} + static really_inline void subCastleMatchLoop(const struct Castle *c, void *full_state, void *stream_state, const u64a end, const u64a loc, u64a *offset) { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + u8 *active = (u8 *)stream_state + c->activeOffset; u8 *matching = full_state; - mmbit_clear(matching, c->numRepeats); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i); - if (match == 0) { - DEBUG_PRINTF("no more matches\n"); - mmbit_unset(active, c->numRepeats, i); - } else if (match > end) { - // If we had a local copy of the active mmbit, we could skip - // looking at this repeat again. But we don't, so we just move - // on. - } else if (match == *offset) { - mmbit_set(matching, c->numRepeats, i); - } else if (match < *offset) { - // New minimum offset. - *offset = match; - mmbit_clear(matching, c->numRepeats); - mmbit_set(matching, c->numRepeats, i); - } + set_matching(c, match, active, matching, c->numRepeats, i, + i, offset, end); } } @@ -451,61 +483,37 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end, // full_state (scratch). u64a offset = end; // min offset of next match - char found = 0; u32 activeIdx = 0; + mmbit_clear(matching, c->numRepeats); if (c->exclusive) { - activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - if (activeIdx < c->numRepeats) { - u32 i = activeIdx; - DEBUG_PRINTF("subcastle %u\n", i); + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + activeIdx = partial_load_u32(cur, c->activeIdxSize); u64a match = subCastleNextMatch(c, full_state, stream_state, - loc, i); - - if (match == 0) { - DEBUG_PRINTF("no more matches\n"); - partial_store_u32(stream_state, c->numRepeats, - c->activeIdxSize); - } else if (match > end) { - // If we had a local copy of the active mmbit, we could skip - // looking at this repeat again. But we don't, so we just move - // on. - } else if (match <= offset) { - if (match < offset) { - // New minimum offset. - offset = match; - } - found = 1; - } + loc, activeIdx); + set_matching(c, match, groups, matching, c->numGroups, i, + activeIdx, &offset, end); } } - const char hasMatch = found; - u64a newOffset = offset; - if (!c->pureExclusive) { + if (c->exclusive != PURE_EXCLUSIVE) { subCastleMatchLoop(c, full_state, stream_state, - end, loc, &newOffset); - - DEBUG_PRINTF("offset=%llu\n", newOffset); - if (mmbit_any(matching, c->numRepeats)) { - found = 1; - if (subCastleFireMatch(c, full_state, stream_state, - cb, ctx, newOffset) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } + end, loc, &offset); } - - if (!found) { + DEBUG_PRINTF("offset=%llu\n", offset); + if (!mmbit_any(matching, c->numRepeats)) { + DEBUG_PRINTF("no more matches\n"); break; - } else if (hasMatch && offset == newOffset) { - const struct SubCastle *sub = getSubCastle(c, activeIdx); - DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, activeIdx); - if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) { - DEBUG_PRINTF("caller told us to halt\n"); - return MO_HALT_MATCHING; - } } - loc = newOffset; + + if (subCastleFireMatch(c, full_state, stream_state, + cb, ctx, offset) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + loc = offset; } return MO_CONTINUE_MATCHING; @@ -564,7 +572,8 @@ char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, static really_inline char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, buf + begin, buf + end); + const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, + buf + begin, buf + end); if (ptr == buf + end) { DEBUG_PRINTF("no escape found\n"); return 0; @@ -725,10 +734,11 @@ static really_inline void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { DEBUG_PRINTF("clearing active repeats due to escape\n"); if (c->exclusive) { - partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (!c->pureExclusive) { + if (c->exclusive != PURE_EXCLUSIVE) { mmbit_clear(active, c->numRepeats); } } @@ -755,7 +765,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, return 1; } - u8 *active = (u8 *)q->streamState + c->activeIdxSize; // active multibit + u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit assert(q->cur + 1 < q->end); // require at least two items assert(q_cur_type(q) == MQE_START); @@ -769,14 +779,8 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, char found = 0; if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + found = mmbit_any(groups, c->numGroups); } if (!found && !mmbit_any(active, c->numRepeats)) { @@ -842,10 +846,9 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, } if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (c->pureExclusive || activeIdx < c->numRepeats) { - return activeIdx < c->numRepeats; + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + if (mmbit_any_precise(groups, c->numGroups)) { + return 1; } } @@ -905,7 +908,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(q_cur_type(q) == MQE_START); const struct Castle *c = getImplNfa(n); - u8 *active = (u8 *)q->streamState + c->activeIdxSize; + u8 *active = (u8 *)q->streamState + c->activeOffset; u64a end_offset = q_last_loc(q) + q->offset; s64a last_kill_loc = castleLastKillLoc(c, q); @@ -938,14 +941,9 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { char found = 0; if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - return 0; - } + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + found = mmbit_any_precise(groups, c->numGroups); + } if (!found && !mmbit_any_precise(active, c->numRepeats)) { @@ -988,11 +986,12 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { const struct Castle *c = getImplNfa(n); assert(q->streamState); if (c->exclusive) { - partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (!c->pureExclusive) { - u8 *active = (u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)q->streamState + c->activeOffset; mmbit_clear(active, c->numRepeats); } return 0; @@ -1006,11 +1005,12 @@ char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset, const struct Castle *c = getImplNfa(n); if (c->exclusive) { - partial_store_u32(state, c->numRepeats, c->activeIdxSize); + u8 *groups = (u8 *)state + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (!c->pureExclusive) { - u8 *active = (u8 *)state + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)state + c->activeOffset; mmbit_clear(active, c->numRepeats); } return 0; @@ -1041,16 +1041,19 @@ char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q, const u64a offset = q->offset + loc; DEBUG_PRINTF("offset=%llu\n", offset); if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("packing state for sub %u\n", activeIdx); subCastleQueueCompressState(c, activeIdx, q, offset); } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("packing state for sub %u\n", i); @@ -1084,15 +1087,19 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest, const struct Castle *c = getImplNfa(n); if (c->exclusive) { - const u32 activeIdx = partial_load_u32(src, c->activeIdxSize); - if (activeIdx < c->numRepeats) { + const u8 *active = (const u8 *)src; + const u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + const u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); subCastleExpandState(c, activeIdx, dest, src, offset); } } - if (!c->pureExclusive) { + if (c->exclusive != PURE_EXCLUSIVE) { // Unpack state for all active repeats. - const u8 *active = (const u8 *)src + c->activeIdxSize; + const u8 *active = (const u8 *)src + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { subCastleExpandState(c, i, dest, src, offset); diff --git a/src/nfa/castle_internal.h b/src/nfa/castle_internal.h index 54578d67c..429c232ff 100644 --- a/src/nfa/castle_internal.h +++ b/src/nfa/castle_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,9 @@ struct SubCastle { u32 streamStateOffset; //!< offset within stream state u32 repeatInfoOffset; //!< offset of RepeatInfo structure // relative to the start of SubCastle - char exclusive; //!< exclusive info of this SubCastle + u32 exclusiveId; //!< exclusive group id of this SubCastle, + // set to the number of SubCastles in Castle + // if it is not exclusive }; #define CASTLE_DOT 0 @@ -51,6 +53,12 @@ struct SubCastle { #define CASTLE_SHUFTI 3 #define CASTLE_TRUFFLE 4 +enum ExclusiveType { + NOT_EXCLUSIVE, //!< no subcastles are exclusive + EXCLUSIVE, //!< a subset of subcastles are exclusive + PURE_EXCLUSIVE //!< all subcastles are exclusive +}; + /** * \brief Castle engine structure. * @@ -66,26 +74,56 @@ struct SubCastle { * - sparse iterator for subcastles that may be stale * * Castle stores an "active repeats" multibit in stream state, followed by the - * packed repeat state for each SubCastle. If all SubCastles are mutual - * exclusive, we store current active SubCastle id instead of "active repeats" - * multibit in stream state. If there are both exclusive and non-exclusive - * SubCastle groups, we use an active id for the exclusive group and a multibit - * for the non-exclusive group. + * packed repeat state for each SubCastle. If there are both exclusive and + * non-exclusive SubCastle groups, we use an active id for each exclusive group + * and a multibit for the non-exclusive group. We also store an "active + * exclusive groups" multibit for exclusive groups. If all SubCastles are mutual + * exclusive, we remove "active repeats" multibit from stream state. + * * Castle stream state: + * * + * * |---| + * * | | active subengine id for exclusive group 1 + * * |---| + * * | | active subengine id for exclusive group 2(if necessary) + * * |---| + * * ... + * * |---| + * * | | "active repeats" multibit for non-exclusive subcastles + * * | | (if not all subcastles are exclusive) + * * |---| + * * | | active multibit for exclusive groups + * * | | + * * |---| + * * ||-|| common pool of stream state for exclusive group 1 + * * ||-|| + * * |---| + * * ||-|| common pool of stream state for exclusive group 2(if necessary) + * * ||-|| + * * |---| + * * ... + * * |---| + * * | | stream state for each non-exclusive subcastles + * * ... + * * | | + * * |---| * * In full state (stored in scratch space) it stores a temporary multibit over * the repeats (used by \ref castleMatchLoop), followed by the repeat control - * blocks for each SubCastle. If all SubCastles are mutual exclusive, we only - * need to store the repeat control blocks for each SubCastle. + * blocks for each SubCastle. */ struct ALIGN_AVX_DIRECTIVE Castle { - u32 numRepeats; - u8 type; //!< tells us which scanning mechanism (below) to use - char exclusive; //!< tells us if there are mutual exclusive SubCastles - char pureExclusive; //!< tells us if all SubCastles are mutual exclusive - u8 activeIdxSize; //!< number of bytes in stream state to store - // active SubCastle id for exclusive mode - u32 staleIterOffset; // removeClique(CliqueGraph &cg) { // the end locations where it overlaps with other literals, // then the literals are mutual exclusive static -bool findExclusivePair(const u32 id1, const u32 id2, +bool findExclusivePair(const size_t id1, const size_t id2, + const size_t lower, const vector> &min_reset_dist, const vector>> &triggers) { const auto &triggers1 = triggers[id1]; const auto &triggers2 = triggers[id2]; - for (u32 i = 0; i < triggers1.size(); ++i) { - for (u32 j = 0; j < triggers2.size(); ++j) { + for (size_t i = 0; i < triggers1.size(); ++i) { + for (size_t j = 0; j < triggers2.size(); ++j) { if (!literalOverlap(triggers1[i], triggers2[j], - min_reset_dist[id2][j]) || + min_reset_dist[id2 - lower][j]) || !literalOverlap(triggers2[j], triggers1[i], - min_reset_dist[id1][i])) { + min_reset_dist[id1 - lower][i])) { return false; } } @@ -264,40 +265,75 @@ bool findExclusivePair(const u32 id1, const u32 id2, } static -vector checkExclusion(const CharReach &cr, - const vector>> &triggers) { - vector group; - if (!triggers.size() || triggers.size() == 1) { - return group; - } - - vector> min_reset_dist; - // get min reset distance for each repeat - for (auto it = triggers.begin(); it != triggers.end(); it++) { - const vector &tmp_dist = minResetDistToEnd(*it, cr); - min_reset_dist.push_back(tmp_dist); - } - - vector vertices; - unique_ptr cg = make_unique(); - for (u32 i = 0; i < triggers.size(); ++i) { - CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); - vertices.push_back(v); - } - - // find exclusive pair for each repeat - for (u32 i = 0; i < triggers.size(); ++i) { - CliqueVertex s = vertices[i]; - for (u32 j = i + 1; j < triggers.size(); ++j) { - if (findExclusivePair(i, j, min_reset_dist, triggers)) { - CliqueVertex d = vertices[j]; - add_edge(s, d, *cg); +vector> checkExclusion(u32 &streamStateSize, + const CharReach &cr, + const vector>> &triggers, + enum ExclusiveType &exclusive, + const size_t numRepeats) { + vector> groups; + size_t trigSize = triggers.size(); + DEBUG_PRINTF("trigSize %zu\n", trigSize); + + size_t lower = 0; + size_t total = 0; + while (lower < trigSize) { + vector vertices; + unique_ptr cg = make_unique(); + + vector> min_reset_dist; + size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize); + // get min reset distance for each repeat + for (size_t i = lower; i < upper; i++) { + CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); + vertices.push_back(v); + + const vector &tmp_dist = + minResetDistToEnd(triggers[i], cr); + min_reset_dist.push_back(tmp_dist); + } + + // find exclusive pair for each repeat + for (size_t i = lower; i < upper; i++) { + CliqueVertex s = vertices[i - lower]; + for (size_t j = i + 1; j < upper; j++) { + if (findExclusivePair(i, j, lower, min_reset_dist, + triggers)) { + CliqueVertex d = vertices[j - lower]; + add_edge(s, d, *cg); + } } } + + // find the largest exclusive group + auto clique = removeClique(*cg); + size_t cliqueSize = clique.size(); + if (cliqueSize > 1) { + groups.push_back(clique); + exclusive = EXCLUSIVE; + total += cliqueSize; + } + + lower += CLIQUE_GRAPH_MAX_SIZE; } + DEBUG_PRINTF("clique size %lu, num of repeats %lu\n", + total, numRepeats); + if (total == numRepeats) { + exclusive = PURE_EXCLUSIVE; + streamStateSize = 0; + }; + + return groups; +} - // find the largest exclusive group - return removeClique(*cg); +namespace { +struct ExclusiveInfo { + + /** Mapping between top and exclusive group id */ + map groupId; + + /** Number of exclusive groups */ + u32 numGroups = 0; +}; } static @@ -306,10 +342,12 @@ void buildSubcastles(const CastleProto &proto, vector &subs, const vector> &repeatInfoPair, u32 &scratchStateSize, u32 &streamStateSize, u32 &tableSize, vector &tables, u32 &sparseRepeats, - const set &exclusiveGroup, vector &may_stale) { + const ExclusiveInfo &exclusiveInfo, + vector &may_stale) { u32 i = 0; - u32 maxStreamSize = 0; - bool exclusive = exclusiveGroup.size() > 1; + const auto &groupId = exclusiveInfo.groupId; + const auto &numGroups = exclusiveInfo.numGroups; + vector maxStreamSize(numGroups, 0); for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); it != ite; ++it, ++i) { const PureRepeat &pr = it->second; @@ -330,8 +368,9 @@ void buildSubcastles(const CastleProto &proto, vector &subs, RepeatInfo &info = infos[i]; // handle exclusive case differently - if (exclusive && exclusiveGroup.find(i) != exclusiveGroup.end()) { - maxStreamSize = MAX(maxStreamSize, rsi.packedCtrlSize); + if (contains(groupId, i)) { + u32 id = groupId.at(i); + maxStreamSize[id] = MAX(maxStreamSize[id], rsi.packedCtrlSize); } else { subScratchStateSize = verify_u32(sizeof(RepeatControl)); subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); @@ -366,25 +405,34 @@ void buildSubcastles(const CastleProto &proto, vector &subs, sub.report = *pr.reports.begin(); if (rtype == REPEAT_SPARSE_OPTIMAL_P) { - for (u32 j = 0; j < rsi.patchSize; j++) { - tables.push_back(rsi.table[j]); - } - sparseRepeats++; - patchSize[i] = rsi.patchSize; - tableSize += rsi.patchSize; + for (u32 j = 0; j < rsi.patchSize; j++) { + tables.push_back(rsi.table[j]); + } + sparseRepeats++; + patchSize[i] = rsi.patchSize; + tableSize += rsi.patchSize; } } - if (exclusive) { - for (auto k : exclusiveGroup) { - SubCastle &sub = subs[k]; - RepeatInfo &info = infos[k]; - info.packedCtrlSize = maxStreamSize; + vector scratchOffset(numGroups, 0); + vector streamOffset(numGroups, 0); + for (const auto &j : groupId) { + u32 top = j.first; + u32 id = j.second; + SubCastle &sub = subs[top]; + RepeatInfo &info = infos[top]; + info.packedCtrlSize = maxStreamSize[id]; + if (!scratchOffset[id]) { sub.fullStateOffset = scratchStateSize; sub.streamStateOffset = streamStateSize; + scratchOffset[id] = scratchStateSize; + streamOffset[id] = streamStateSize; + scratchStateSize += verify_u32(sizeof(RepeatControl)); + streamStateSize += maxStreamSize[id]; + } else { + sub.fullStateOffset = scratchOffset[id]; + sub.streamStateOffset = streamOffset[id]; } - scratchStateSize += verify_u32(sizeof(RepeatControl)); - streamStateSize += maxStreamSize; } } @@ -423,8 +471,9 @@ buildCastle(const CastleProto &proto, depth maxWidth(0); u32 i = 0; - vector candidateRepeats; + ExclusiveInfo exclusiveInfo; vector>> candidateTriggers; + vector candidateRepeats; vector> repeatInfoPair; for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); it != ite; ++it, ++i) { @@ -459,38 +508,40 @@ buildCastle(const CastleProto &proto, repeatInfoPair.push_back(make_pair(min_period, is_reset)); - if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) { - candidateTriggers.push_back(triggers.at(top)); - candidateRepeats.push_back(i); - } + candidateTriggers.push_back(triggers.at(top)); + candidateRepeats.push_back(i); } // Case 1: exclusive repeats - bool exclusive = false; - bool pureExclusive = false; + enum ExclusiveType exclusive = NOT_EXCLUSIVE; u32 activeIdxSize = 0; - set exclusiveGroup; + u32 groupIterOffset = 0; if (cc.grey.castleExclusive) { - vector tmpGroup = checkExclusion(cr, candidateTriggers); - const u32 exclusiveSize = tmpGroup.size(); - if (exclusiveSize > 1) { - // Case 1: mutual exclusive repeats group found, initialize state - // sizes - exclusive = true; + auto cliqueGroups = + checkExclusion(streamStateSize, cr, candidateTriggers, + exclusive, numRepeats); + for (const auto &group : cliqueGroups) { + // mutual exclusive repeats group found, + // update state sizes activeIdxSize = calcPackedBytes(numRepeats + 1); - if (exclusiveSize == numRepeats) { - pureExclusive = true; - streamStateSize = 0; - scratchStateSize = 0; - } streamStateSize += activeIdxSize; // replace with top values - for (const auto &val : tmpGroup) { - exclusiveGroup.insert(candidateRepeats[val]); + for (const auto &val : group) { + const u32 top = candidateRepeats[val]; + exclusiveInfo.groupId[top] = exclusiveInfo.numGroups; } + exclusiveInfo.numGroups++; + } + + if (exclusive) { + groupIterOffset = streamStateSize; + streamStateSize += mmbit_size(exclusiveInfo.numGroups); } + + DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups); } + candidateRepeats.clear(); DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(), exclusive); @@ -501,7 +552,7 @@ buildCastle(const CastleProto &proto, buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, scratchStateSize, streamStateSize, tableSize, - tables, sparseRepeats, exclusiveGroup, may_stale); + tables, sparseRepeats, exclusiveInfo, may_stale); DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); vector stale_iter; @@ -536,9 +587,11 @@ buildCastle(const CastleProto &proto, char *ptr = base_ptr; Castle *c = (Castle *)ptr; c->numRepeats = verify_u32(subs.size()); - c->exclusive = exclusive; - c->pureExclusive = pureExclusive; + c->numGroups = exclusiveInfo.numGroups; + c->exclusive = verify_s8(exclusive); c->activeIdxSize = verify_u8(activeIdxSize); + c->activeOffset = verify_u32(c->numGroups * activeIdxSize); + c->groupIterOffset = groupIterOffset; writeCastleScanEngine(cr, c); @@ -572,10 +625,10 @@ buildCastle(const CastleProto &proto, } // set exclusive group info - if (exclusiveGroup.find(i) != exclusiveGroup.end()) { - sub->exclusive = 1; + if (contains(exclusiveInfo.groupId, i)) { + sub->exclusiveId = exclusiveInfo.groupId[i]; } else { - sub->exclusive = 0; + sub->exclusiveId = numRepeats; } } From 90ea5b601014139c41c11cd296c27b5a13a53d77 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 29 Feb 2016 08:25:02 +1100 Subject: [PATCH 095/218] Correct asserts --- src/nfa/nfa_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index e13482b5f..089e96838 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -245,14 +245,14 @@ int isMultiTopType(u8 t) { /* Use for functions that return an integer. */ #define NFA_API_NO_IMPL(...) \ ({ \ - assert("not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ 0; /* return value, for places that need it */ \ }) /* Use for _zombie_status functions. */ #define NFA_API_ZOMBIE_NO_IMPL(...) \ ({ \ - assert("not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ NFA_ZOMBIE_NO; \ }) From ec223e362260a60982391f931737709bb2f2ff2e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 29 Feb 2016 17:06:31 +1100 Subject: [PATCH 096/218] Always init NFAContext::cached_br Now that it's on stack, this should be initialised. Silences a warning from valgrind. --- src/nfa/limex_runtime_impl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 011913f94..676ed3700 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -537,6 +537,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { ctx.callback = q->cb; ctx.context = q->context; STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_br = 0; assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); @@ -631,6 +632,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ctx.callback = q->cb; ctx.context = q->context; STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx.s, LOAD_STATE(q->state)); @@ -722,6 +724,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ctx.callback = NULL; ctx.context = NULL; STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx.s, LOAD_STATE(q->state)); @@ -821,6 +824,7 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, ctx.callback = cb; ctx.context = context; STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_br = 0; const IMPL_NFA_T *limex = getImplNfa(n); STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored From 5537f9da786e00f9842f3dbe580e85cbeb8d2f8b Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 1 Mar 2016 10:56:22 +1100 Subject: [PATCH 097/218] Install pkgconfig file everywhere but Windows --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ae459d84..5fb946eb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -340,8 +340,7 @@ endif() configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h) configure_file(src/hs_version.h.in ${PROJECT_BINARY_DIR}/hs_version.h) -if (PKG_CONFIG_FOUND) - # we really only need to do this if we have pkg-config +if (NOT WIN32) configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars install(FILES ${CMAKE_BINARY_DIR}/libhs.pc DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig") From 82438a567f028e43b8967bfaa1b8df1d1d7d8de4 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 1 Mar 2016 10:56:58 +1100 Subject: [PATCH 098/218] Fix typo --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fb946eb4..77af34a56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,7 @@ if(NOT Boost_FOUND) set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") find_package(Boost ${BOOST_MINVERSION}) if(NOT Boost_FOUND) - message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") + message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") endif() endif() From 78b780c550c1eb1bf89df5d9ef2d32db2e465e6b Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 1 Mar 2016 11:04:09 +1100 Subject: [PATCH 099/218] Don't enable -Werror for release builds Using -Werror is a very good thing during development, but it has the potential to break the user's build, for example, if they are using a slightly different compiler combination to one that has been tested previously. --- CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 77af34a56..49c9a4add 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,8 +171,14 @@ else() endif() # set compiler flags - more are tested and added later - set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -Werror") - set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Werror -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor") + set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual") + set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor") + if (NOT RELEASE_BUILD) + # -Werror is most useful during development, don't potentially break + # release builds + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") + endif() if (NOT CMAKE_C_FLAGS MATCHES .*march.*) message(STATUS "Building for current host CPU") From 60dcd0c160c19cb60df7da6ce2e35ba69f879971 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 12 Nov 2015 10:52:48 +1100 Subject: [PATCH 100/218] Remove CMake tests for pthread barriers --- cmake/config.h.in | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cmake/config.h.in b/cmake/config.h.in index 63e0afc22..9c8f8a275 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -39,10 +39,6 @@ /* C compiler has intrin.h */ #cmakedefine HAVE_C_INTRIN_H -/* Define to 1 if you have the declaration of `pthread_barrier_init', and to 0 - if you don't. */ -#cmakedefine HAVE_DECL_PTHREAD_BARRIER_INIT - /* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to 0 if you don't. */ #cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP From 4e80d22d790492fe8143ca20e956d2f589ce3cbe Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 17 Feb 2016 16:54:45 +1100 Subject: [PATCH 101/218] Use using directives to silence hiding warning --- src/parser/check_refs.cpp | 4 +++- src/parser/prefilter.cpp | 9 ++++++++- src/parser/unsupported.cpp | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/parser/check_refs.cpp b/src/parser/check_refs.cpp index ad81ae760..fae68f745 100644 --- a/src/parser/check_refs.cpp +++ b/src/parser/check_refs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,6 +73,8 @@ class ReferenceVisitor: public DefaultConstComponentVisitor { throw ParseError(str.str()); } + using DefaultConstComponentVisitor::pre; + void pre(const ComponentBackReference &c) override { if (c.ref_id) { if (c.ref_id >= num_ids) { diff --git a/src/parser/prefilter.cpp b/src/parser/prefilter.cpp index f5a0c66c8..ea58a134f 100644 --- a/src/parser/prefilter.cpp +++ b/src/parser/prefilter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,6 +80,9 @@ class SafeReferentVisitor : public DefaultConstComponentVisitor { return numPositions <= MAX_REFERENT_POSITIONS; } + using DefaultConstComponentVisitor::pre; + using DefaultConstComponentVisitor::post; + void pre(const AsciiComponentClass &) override { numPositions++; } @@ -164,6 +167,8 @@ class FindSequenceVisitor : public DefaultConstComponentVisitor { explicit FindSequenceVisitor(unsigned ref_id) : id(ref_id) {} explicit FindSequenceVisitor(const std::string &s) : name(s) {} + using DefaultConstComponentVisitor::pre; + void pre(const ComponentSequence &c) override { if (!name.empty()) { if (c.getCaptureName() == name) { @@ -203,6 +208,8 @@ class PrefilterVisitor : public DefaultComponentVisitor { PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {} ~PrefilterVisitor() override; + using DefaultComponentVisitor::visit; + /** \brief Calls the visitor (recursively) on a new replacement component * we've just created. Takes care of freeing it if the sequence is itself * replaced. */ diff --git a/src/parser/unsupported.cpp b/src/parser/unsupported.cpp index c97a57503..c4b18b6a3 100644 --- a/src/parser/unsupported.cpp +++ b/src/parser/unsupported.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,6 +45,7 @@ namespace ue2 { class UnsupportedVisitor : public DefaultConstComponentVisitor { public: ~UnsupportedVisitor() override; + using DefaultConstComponentVisitor::pre; void pre(const ComponentAssertion &) override { throw ParseError("Zero-width assertions are not supported."); } From 1bc12139a2385bc7c5a70d971cc71069e21a64ea Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 17 Feb 2016 17:00:30 +1100 Subject: [PATCH 102/218] ComponentCondReference: mark ctors explicit --- src/parser/ComponentCondReference.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/ComponentCondReference.h b/src/parser/ComponentCondReference.h index 9d53b2a82..c0ee9ac3a 100644 --- a/src/parser/ComponentCondReference.h +++ b/src/parser/ComponentCondReference.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,9 +46,9 @@ class ComponentCondReference : public ComponentSequence { friend class ReferenceVisitor; friend class PrintVisitor; public: - ComponentCondReference(unsigned ref); - ComponentCondReference(const std::string &name); - ComponentCondReference(std::unique_ptr c); + explicit ComponentCondReference(unsigned ref); + explicit ComponentCondReference(const std::string &name); + explicit ComponentCondReference(std::unique_ptr c); ~ComponentCondReference() override; ComponentCondReference *clone() const override; From b58d05dfecb1cefc1055aba97d9cb9fcd2de85fc Mon Sep 17 00:00:00 2001 From: Mohammad Abdul Awal Date: Tue, 1 Mar 2016 16:06:00 +0000 Subject: [PATCH 103/218] Fixed some DEBUG_OUTPUT format string. --- src/nfa/castlecompile.cpp | 6 +++--- src/nfagraph/ng_equivalence.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 67d9b3d48..5e8b662af 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -205,7 +205,7 @@ bool graph_empty(const Graph &g) { static vector removeClique(CliqueGraph &cg) { vector> cliquesVec(1); - DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg)); + DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg)); findCliqueGroup(cg, cliquesVec[0]); while (!graph_empty(cg)) { const vector &c = cliquesVec.back(); @@ -237,7 +237,7 @@ vector removeClique(CliqueGraph &cg) { } } - DEBUG_PRINTF("clique size:%lu\n", cliquesVec[id].size()); + DEBUG_PRINTF("clique size:%zu\n", cliquesVec[id].size()); return cliquesVec[id]; } @@ -315,7 +315,7 @@ vector> checkExclusion(u32 &streamStateSize, lower += CLIQUE_GRAPH_MAX_SIZE; } - DEBUG_PRINTF("clique size %lu, num of repeats %lu\n", + DEBUG_PRINTF("clique size %zu, num of repeats %zu\n", total, numRepeats); if (total == numRepeats) { exclusive = PURE_EXCLUSIVE; diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index 148cad493..b8e5a8d6d 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -377,7 +377,7 @@ void partitionGraph(ptr_vector &infos, ClassMap &classes, work_queue.push(eq_class); } } - DEBUG_PRINTF("partitioned, %lu equivalence classes\n", classinfomap.size()); + DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classinfomap.size()); } // generalized equivalence processing (left and right) From 67b9784dae58cf655be60c709de9b53ed4aad58e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 18 Feb 2016 09:45:37 +1100 Subject: [PATCH 104/218] Rose: use program for all literal matches Unifies all literal match paths so that the Rose program is used for all of them. This removes the previous specialised "direct report" and "multi direct report" paths. Some additional REPORT instruction work was necessary for this. Reworked literal construction path at compile time in prep for using program offsets as literal IDs. Completely removed the anchored log runtime, which is no longer worth the extra complexity. --- src/report.h | 201 ++++++++-------- src/rose/block.c | 6 +- src/rose/catchup.c | 220 +---------------- src/rose/catchup.h | 170 ++++++------- src/rose/eod.c | 6 +- src/rose/infix.h | 1 + src/rose/match.c | 203 ++++------------ src/rose/match.h | 20 +- src/rose/program_runtime.h | 92 ++++--- src/rose/rose.h | 7 + src/rose/rose_build_anchored.cpp | 69 +++--- src/rose/rose_build_anchored.h | 19 +- src/rose/rose_build_bytecode.cpp | 396 +++++++++++++++++++------------ src/rose/rose_build_compile.cpp | 69 +----- src/rose/rose_build_dump.cpp | 9 - src/rose/rose_build_impl.h | 18 -- src/rose/rose_build_misc.cpp | 1 - src/rose/rose_dump.cpp | 20 +- src/rose/rose_internal.h | 43 +--- src/rose/rose_program.h | 18 +- src/rose/runtime.h | 12 - src/rose/stream.c | 11 +- src/runtime.c | 187 +++++---------- src/scratch.c | 34 +-- src/scratch.h | 11 - src/scratch_dump.cpp | 6 - 26 files changed, 677 insertions(+), 1172 deletions(-) diff --git a/src/report.h b/src/report.h index 69497da87..96cea32e9 100644 --- a/src/report.h +++ b/src/report.h @@ -68,14 +68,14 @@ enum DedupeResult { static really_inline enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, - const struct internal_report *ri, + const struct internal_report *ir, struct hs_scratch *scratch, u64a offset, u64a from_offset, u64a to_offset, const char do_som) { DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u, do_som=%d\n", offset, - from_offset, to_offset, ri->dkey, do_som); - DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom); - const u32 dkey = ri->dkey; + from_offset, to_offset, ir->dkey, do_som); + DEBUG_PRINTF("report type=%u, quashSom=%d\n", ir->type, ir->quashSom); + const u32 dkey = ir->dkey; // We should not have been called if there's no dedupe work to do. assert(do_som || dkey != MO_INVALID_IDX); @@ -99,8 +99,8 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, if (dkey != MO_INVALID_IDX) { const u32 dkeyCount = rose->dkeyCount; - const s32 offset_adj = ri->offsetAdjust; - if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) { + const s32 offset_adj = ir->offsetAdjust; + if (ir->type == EXTERNAL_CALLBACK || ir->quashSom) { DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); assert(offset_adj == 0 || offset_adj == -1); if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { @@ -136,12 +136,12 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, static really_inline enum DedupeResult dedupeCatchupSom(const struct RoseEngine *rose, - const struct internal_report *ri, + const struct internal_report *ir, struct hs_scratch *scratch, u64a offset, u64a from_offset, u64a to_offset) { DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u\n", offset, - from_offset, to_offset, ri->dkey); - DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom); + from_offset, to_offset, ir->dkey); + DEBUG_PRINTF("report type=%u, quashSom=%d\n", ir->type, ir->quashSom); struct match_deduper *deduper = &scratch->deduper; if (offset != deduper->current_report_offset) { @@ -160,11 +160,11 @@ enum DedupeResult dedupeCatchupSom(const struct RoseEngine *rose, deduper->current_report_offset = offset; } - const u32 dkey = ri->dkey; + const u32 dkey = ir->dkey; if (dkey != MO_INVALID_IDX) { const u32 dkeyCount = rose->dkeyCount; - const s32 offset_adj = ri->offsetAdjust; - if (ri->quashSom) { + const s32 offset_adj = ir->offsetAdjust; + if (ir->quashSom) { DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); assert(offset_adj == 0 || offset_adj == -1); if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { @@ -208,11 +208,11 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, struct core_info *ci = &scratch->core_info; const struct RoseEngine *rose = ci->rose; DEBUG_PRINTF("internal report %u\n", id); - const struct internal_report *ri = getInternalReport(rose, id); + const struct internal_report *ir = getInternalReport(rose, id); - assert(isExternalReport(ri)); /* only external reports should reach here */ + assert(isExternalReport(ir)); /* only external reports should reach here */ - s32 offset_adj = ri->offsetAdjust; + s32 offset_adj = ir->offsetAdjust; u64a to_offset = offset; u64a from_offset = 0; @@ -225,7 +225,7 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, #endif DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, + "offsetAdj=%d\n", offset, id, ir->type, ir->onmatch, offset_adj); if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */ @@ -233,46 +233,46 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, return MO_HALT_MATCHING; } - if (!is_simple && ri->hasBounds) { - assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); - assert(ri->minOffset <= ri->maxOffset); - if (offset < ri->minOffset || offset > ri->maxOffset) { + if (!is_simple && ir->hasBounds) { + assert(ir->minOffset || ir->minLength || ir->maxOffset < MAX_OFFSET); + assert(ir->minOffset <= ir->maxOffset); + if (offset < ir->minOffset || offset > ir->maxOffset) { DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - offset, ri->minOffset, ri->maxOffset); + offset, ir->minOffset, ir->maxOffset); return ROSE_CONTINUE_MATCHING_NO_EXHAUST; } } - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { + if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); return MO_CONTINUE_MATCHING; } - if (ri->type == EXTERNAL_CALLBACK) { + if (ir->type == EXTERNAL_CALLBACK) { from_offset = 0; } else if (do_som) { - from_offset = handleSomExternal(scratch, ri, to_offset); + from_offset = handleSomExternal(scratch, ir, to_offset); } to_offset += offset_adj; assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - if (do_som && ri->minLength) { - if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) { + if (do_som && ir->minLength) { + if (!satisfiesMinLength(ir->minLength, from_offset, to_offset)) { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; } - if (ri->quashSom) { + if (ir->quashSom) { from_offset = 0; } } DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); + from_offset, to_offset, ir->onmatch, ci->userContext); int halt = 0; - if (do_som || ri->dkey != MO_INVALID_IDX) { - enum DedupeResult dedupe_rv = dedupeCatchup(rose, ri, scratch, offset, + if (do_som || ir->dkey != MO_INVALID_IDX) { + enum DedupeResult dedupe_rv = dedupeCatchup(rose, ir, scratch, offset, from_offset, to_offset, do_som); switch (dedupe_rv) { case DEDUPE_HALT: @@ -286,7 +286,7 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, } } - halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, + halt = ci->userCallback((unsigned int)ir->onmatch, from_offset, to_offset, flags, ci->userContext); exit: if (halt) { @@ -295,8 +295,8 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, return MO_HALT_MATCHING; } - if (!is_simple && ri->ekey != END_EXHAUST) { - markAsMatched(ci->exhaustionVector, ri->ekey); + if (!is_simple && ir->ekey != END_EXHAUST) { + markAsMatched(ci->exhaustionVector, ir->ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; @@ -310,58 +310,52 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, * that dedupe catchup has been done. */ static really_inline -int roseDeliverReport(u64a offset, ReportID id, struct hs_scratch *scratch, - char is_exhaustible) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. +int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, + s32 offset_adjust, struct hs_scratch *scratch, u32 ekey) { assert(scratch); assert(scratch->magic == SCRATCH_MAGIC); struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - DEBUG_PRINTF("internal report %u\n", id); - const struct internal_report *ri = getInternalReport(rose, id); - - assert(isExternalReport(ri)); /* only external reports should reach here */ - const s32 offset_adj = ri->offsetAdjust; u32 flags = 0; #ifndef RELEASE_BUILD - if (offset_adj) { + if (offset_adjust) { // alert testing tools that we've got adjusted matches flags |= HS_MATCH_FLAG_ADJUSTED; } #endif - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, - offset_adj); +#ifndef NDEBUG + // Assertions for development builds. + UNUSED const struct internal_report *ir = getInternalReport(ci->rose, id); + assert(isExternalReport(ir)); /* only external reports should reach here */ assert(!can_stop_matching(scratch)); - assert(!ri->hasBounds || - (offset >= ri->minOffset && offset <= ri->maxOffset)); - assert(ri->type == EXTERNAL_CALLBACK); - assert(!ri->minLength); - assert(!ri->quashSom); - assert(ri->ekey == INVALID_EKEY || - !isExhausted(ci->exhaustionVector, ri->ekey)); + assert(!ir->hasBounds || + (offset >= ir->minOffset && offset <= ir->maxOffset)); + assert(ir->type == EXTERNAL_CALLBACK); + assert(!ir->minLength); + assert(!ir->quashSom); +#endif + + assert(ekey == INVALID_EKEY || !isExhausted(ci->exhaustionVector, ekey)); u64a from_offset = 0; - u64a to_offset = offset + offset_adj; + u64a to_offset = offset + offset_adjust; DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); + from_offset, to_offset, onmatch, ci->userContext); - int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, - to_offset, flags, ci->userContext); + int halt = ci->userCallback(onmatch, from_offset, to_offset, flags, + ci->userContext); if (halt) { DEBUG_PRINTF("callback requested to terminate matches\n"); ci->status |= STATUS_TERMINATED; return MO_HALT_MATCHING; } - if (is_exhaustible) { - assert(ri->ekey != INVALID_EKEY); - markAsMatched(ci->exhaustionVector, ri->ekey); + if (ekey != INVALID_EKEY) { + markAsMatched(ci->exhaustionVector, ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; @@ -379,62 +373,62 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, struct core_info *ci = &scratch->core_info; const struct RoseEngine *rose = ci->rose; - const struct internal_report *ri = getInternalReport(rose, id); + const struct internal_report *ir = getInternalReport(rose, id); /* internal events should be handled by rose directly */ - assert(ri->type == EXTERNAL_CALLBACK); + assert(ir->type == EXTERNAL_CALLBACK); DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, - ri->offsetAdjust); + "offsetAdj=%d\n", to_offset, id, ir->type, ir->onmatch, + ir->offsetAdjust); if (unlikely(can_stop_matching(scratch))) { DEBUG_PRINTF("pre broken - halting\n"); return MO_HALT_MATCHING; } - if (!is_simple && ri->hasBounds) { - assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); - if (to_offset < ri->minOffset || to_offset > ri->maxOffset) { + if (!is_simple && ir->hasBounds) { + assert(ir->minOffset || ir->minLength || ir->maxOffset < MAX_OFFSET); + if (to_offset < ir->minOffset || to_offset > ir->maxOffset) { DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - to_offset, ri->minOffset, ri->maxOffset); + to_offset, ir->minOffset, ir->maxOffset); return MO_CONTINUE_MATCHING; } } int halt = 0; - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { + if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); goto exit; } u64a offset = to_offset; - to_offset += ri->offsetAdjust; + to_offset += ir->offsetAdjust; assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - if (!is_simple && ri->minLength) { - if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) { + if (!is_simple && ir->minLength) { + if (!satisfiesMinLength(ir->minLength, from_offset, to_offset)) { return MO_CONTINUE_MATCHING; } - if (ri->quashSom) { + if (ir->quashSom) { from_offset = 0; } } DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); + from_offset, to_offset, ir->onmatch, ci->userContext); #ifndef RELEASE_BUILD - if (ri->offsetAdjust != 0) { + if (ir->offsetAdjust != 0) { // alert testing tools that we've got adjusted matches flags |= HS_MATCH_FLAG_ADJUSTED; } #endif enum DedupeResult dedupe_rv = - dedupeCatchupSom(rose, ri, scratch, offset, from_offset, to_offset); + dedupeCatchupSom(rose, ir, scratch, offset, from_offset, to_offset); switch (dedupe_rv) { case DEDUPE_HALT: halt = 1; @@ -446,11 +440,11 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, break; } - halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, + halt = ci->userCallback((unsigned int)ir->onmatch, from_offset, to_offset, flags, ci->userContext); if (!is_simple) { - markAsMatched(ci->exhaustionVector, ri->ekey); + markAsMatched(ci->exhaustionVector, ir->ekey); } exit: @@ -470,48 +464,41 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, * that dedupe catchup has been done. */ static really_inline -int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID id, +int roseDeliverSomReport(u64a from_offset, u64a to_offset, + const struct internal_report *ir, struct hs_scratch *scratch, char is_exhaustible) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. assert(scratch); assert(scratch->magic == SCRATCH_MAGIC); - - u32 flags = 0; + assert(isExternalReport(ir)); /* only external reports should reach here */ struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - const struct internal_report *ri = getInternalReport(rose, id); - - assert(isExternalReport(ri)); /* only external reports should reach here */ - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, - ri->offsetAdjust); + u32 flags = 0; +#ifndef RELEASE_BUILD + if (ir->offsetAdjust != 0) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif assert(!can_stop_matching(scratch)); - assert(!ri->hasBounds || - (to_offset >= ri->minOffset && to_offset <= ri->maxOffset)); - assert(ri->ekey == INVALID_EKEY || - !isExhausted(ci->exhaustionVector, ri->ekey)); + assert(!ir->hasBounds || + (to_offset >= ir->minOffset && to_offset <= ir->maxOffset)); + assert(ir->ekey == INVALID_EKEY || + !isExhausted(ci->exhaustionVector, ir->ekey)); - to_offset += ri->offsetAdjust; + to_offset += ir->offsetAdjust; assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - assert(!ri->minLength || - satisfiesMinLength(ri->minLength, from_offset, to_offset)); - assert(!ri->quashSom || from_offset == 0); + assert(!ir->minLength || + satisfiesMinLength(ir->minLength, from_offset, to_offset)); + assert(!ir->quashSom || from_offset == 0); DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); + from_offset, to_offset, ir->onmatch, ci->userContext); -#ifndef RELEASE_BUILD - if (ri->offsetAdjust != 0) { - // alert testing tools that we've got adjusted matches - flags |= HS_MATCH_FLAG_ADJUSTED; - } -#endif - int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, + int halt = ci->userCallback((unsigned int)ir->onmatch, from_offset, to_offset, flags, ci->userContext); if (halt) { @@ -521,8 +508,8 @@ int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID id, } if (is_exhaustible) { - assert(ri->ekey != INVALID_EKEY); - markAsMatched(ci->exhaustionVector, ri->ekey); + assert(ir->ekey != INVALID_EKEY); + markAsMatched(ci->exhaustionVector, ir->ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; diff --git a/src/rose/block.c b/src/rose/block.c index e081d3aec..853f1ead5 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -150,10 +150,7 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->minMatchOffset = 0; tctxt->minNonMpvMatchOffset = 0; tctxt->next_mpv_offset = 0; - tctxt->curr_anchored_loc = MMB_INVALID; - tctxt->curr_row_offset = 0; - scratch->am_log_sum = 0; /* clear the anchored logs */ scratch->al_log_sum = 0; fatbit_clear(scratch->aqa); @@ -219,7 +216,6 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, goto exit; } - resetAnchoredLog(t, scratch); skip_atable:; } @@ -263,5 +259,5 @@ exit:; assert(!can_stop_matching(scratch)); - roseCatchUpTo(t, scratch, length, 0); + roseCatchUpTo(t, scratch, length); } diff --git a/src/rose/catchup.c b/src/rose/catchup.c index b84ca59c7..c740fe087 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -26,6 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief Rose runtime: code for catching up output-exposed engines. + */ + #include "catchup.h" #include "match.h" #include "rose.h" @@ -53,7 +58,7 @@ int handleReportInternally(const struct RoseEngine *t, return 1; } if (ri->type == INTERNAL_ROSE_CHAIN) { - roseHandleChainMatch(t, scratch, id, offset, 0, 1); + roseHandleChainMatch(t, scratch, id, offset, 1); return 1; } @@ -80,66 +85,6 @@ int handleReportInternallyNoChain(const struct RoseEngine *t, return 0; } -static really_inline -void currentAnchoredMatch(const struct RoseEngine *t, - struct RoseContext *tctxt, ReportID *reportId, - u64a *end) { - if (tctxt->curr_anchored_loc == MMB_INVALID) { - *end = ANCHORED_MATCH_SENTINEL; - *reportId = ANCHORED_MATCH_SENTINEL; - DEBUG_PRINTF("curr %u [idx = %u] @%llu\n", *reportId, - tctxt->curr_row_offset, *end); - return; - } - - *end = tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1; - *reportId = getAnchoredMap(t)[tctxt->curr_row_offset]; - - DEBUG_PRINTF("curr %u [idx = %u] @%llu\n", *reportId, - tctxt->curr_row_offset, *end); -} - -static rose_inline -void nextAnchoredMatch(const struct RoseEngine *t, struct hs_scratch *scratch, - ReportID *reportId, u64a *end) { - struct RoseContext *tctxt = &scratch->tctxt; - assert(tctxt->curr_anchored_loc != MMB_INVALID); - - struct fatbit **anchoredRows = getAnchoredLog(scratch); - - u32 region_width = t->anchoredMatches; - struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc]; - - tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, - tctxt->curr_row_offset); - DEBUG_PRINTF("next %u [idx = %u] @%llu\n", *reportId, - tctxt->curr_row_offset, *end); - if (tctxt->curr_row_offset != MMB_INVALID) { - *end = tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1; - *reportId = getAnchoredMap(t)[tctxt->curr_row_offset]; - return; - } - - tctxt->curr_anchored_loc = bf64_iterate(scratch->am_log_sum, - tctxt->curr_anchored_loc); - - if (tctxt->curr_anchored_loc == MMB_INVALID) { - *end = ANCHORED_MATCH_SENTINEL; - *reportId = ANCHORED_MATCH_SENTINEL; - return; - } - - assert(tctxt->curr_anchored_loc < scratch->anchored_region_len); - curr_row = anchoredRows[tctxt->curr_anchored_loc]; - - tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, - MMB_INVALID); - assert(tctxt->curr_row_offset != MMB_INVALID); - - *end = tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1; - *reportId = getAnchoredMap(t)[tctxt->curr_row_offset]; -} - static really_inline void deactivateQueue(const struct RoseEngine *t, u8 *aa, u32 qi, struct hs_scratch *scratch) { @@ -767,7 +712,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, char alive = blast_queue(t, scratch, q, a_qi, second_place_loc, 0); - /* We have three posible outcomes: + /* We have three possible outcomes: * (1) the nfa died * (2) we completed the queue (implies that second_place_loc == final_loc) * (3) the queue ran to second_place_loc and stopped. In this case we need @@ -1089,119 +1034,7 @@ exit:; return HWLM_CONTINUE_MATCHING; } -static really_inline -hwlmcb_rv_t roseCatchUpNfasAndMpv(const struct RoseEngine *t, - s64a loc, s64a final_loc, - struct hs_scratch *scratch) { - hwlmcb_rv_t rv = roseCatchUpNfas(t, loc, final_loc, scratch); - - if (rv != HWLM_CONTINUE_MATCHING) { - return rv; - } - - return roseCatchUpMPV(t, loc, scratch); -} - - -static really_inline -hwlmcb_rv_t roseCatchUpAll_i(s64a loc, struct hs_scratch *scratch, - char do_full_mpv) { - const struct RoseEngine *t = scratch->core_info.rose; - assert(t->activeArrayCount); /* otherwise use roseCatchUpAnchoredOnly */ - struct RoseContext *tctxt = &scratch->tctxt; - u64a current_offset = scratch->core_info.buf_offset + loc; - - u64a anchored_end; - ReportID anchored_report; - currentAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); - - DEBUG_PRINTF("am current_offset %llu\n", current_offset); - DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset); - DEBUG_PRINTF("min non mpv match offset %llu\n", - scratch->tctxt.minNonMpvMatchOffset); - - assert(current_offset > tctxt->minMatchOffset); - assert(anchored_end != ANCHORED_MATCH_SENTINEL); - - hwlmcb_rv_t rv = buildSufPQ(t, scratch->core_info.state, - anchored_end - scratch->core_info.buf_offset, - loc, scratch); - if (rv != HWLM_CONTINUE_MATCHING) { - return rv; - } - - /* buildSufPQ may have caught only part of the pq upto anchored_end */ - rv = roseCatchUpNfas(t, - anchored_end - scratch->core_info.buf_offset, loc, - scratch); - - if (rv != HWLM_CONTINUE_MATCHING) { - return rv; - } - - while (anchored_report != MO_INVALID_IDX - && anchored_end <= current_offset) { - if (anchored_end != tctxt->minMatchOffset) { - rv = roseCatchUpNfasAndMpv(t, - anchored_end - scratch->core_info.buf_offset, - loc, scratch); - if (rv != HWLM_CONTINUE_MATCHING) { - DEBUG_PRINTF("halting\n"); - return rv; - } - } - - assert(anchored_end == tctxt->minMatchOffset); - updateLastMatchOffset(tctxt, anchored_end); - - if (handleReportInternally(t, scratch, anchored_report, anchored_end)) { - goto next; - } - - if (tctxt->cb(anchored_end, anchored_report, scratch) - == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - next: - nextAnchoredMatch(t, scratch, &anchored_report, &anchored_end); - DEBUG_PRINTF("catch up %u %llu\n", anchored_report, anchored_end); - } - - if (current_offset == tctxt->minMatchOffset) { - DEBUG_PRINTF("caught up\n"); - assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue); - return HWLM_CONTINUE_MATCHING; - } - - rv = roseCatchUpNfas(t, loc, loc, scratch); - - if (rv != HWLM_CONTINUE_MATCHING) { - return rv; - } - - assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue - || rv == HWLM_TERMINATE_MATCHING); - - if (do_full_mpv) { - /* finish off any outstanding chained matches */ - rv = roseCatchUpMPV(t, loc, scratch); - } - - DEBUG_PRINTF("catchup all done %llu\n", current_offset); - - return rv; -} - hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch) { - return roseCatchUpAll_i(loc, scratch, 1); -} - -hwlmcb_rv_t roseCatchUpAnchoredAndSuf(s64a loc, struct hs_scratch *scratch) { - return roseCatchUpAll_i(loc, scratch, 0); -} - -hwlmcb_rv_t roseCatchUpSufAndChains(s64a loc, struct hs_scratch *scratch) { /* just need suf/outfixes and mpv */ DEBUG_PRINTF("loc %lld mnmmo %llu mmo %llu\n", loc, scratch->tctxt.minNonMpvMatchOffset, @@ -1248,42 +1081,3 @@ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) { return rv; } - -hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch) { - const struct RoseEngine *t = scratch->core_info.rose; - struct RoseContext *tctxt = &scratch->tctxt; - - assert(!t->activeArrayCount); /* otherwise use roseCatchUpAll */ - - u64a current_offset = scratch->core_info.buf_offset + loc; - u64a anchored_end; - ReportID anchored_report; - currentAnchoredMatch(t, tctxt, &anchored_report, &anchored_end); - - DEBUG_PRINTF("am current_offset %llu\n", current_offset); - - assert(current_offset > tctxt->minMatchOffset); - - while (anchored_report != MO_INVALID_IDX - && anchored_end <= current_offset) { - updateLastMatchOffset(tctxt, anchored_end); - - /* as we require that there are no leaf nfas - there must be no nfa */ - if (handleReportInternallyNoChain(t, scratch, anchored_report, - anchored_end)) { - goto next; - } - - if (tctxt->cb(anchored_end, anchored_report, scratch) - == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - next: - nextAnchoredMatch(t, scratch, &anchored_report, &anchored_end); - DEBUG_PRINTF("catch up %u %llu\n", anchored_report, anchored_end); - } - - updateMinMatchOffset(tctxt, current_offset); - return HWLM_CONTINUE_MATCHING; -} diff --git a/src/rose/catchup.h b/src/rose/catchup.h index 65fd12c9d..910aa8da0 100644 --- a/src/rose/catchup.h +++ b/src/rose/catchup.h @@ -26,6 +26,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief Rose runtime: code for catching up output-exposed engines. + * + * Rose has several components which run behind the main (floating table) clock + * and need to be caught up before we report matches. + * + * Currently we have to deal with: + * 1. Suffix/Outfix NFAs + * 2. A single MPV NFA (chained), which may also be triggered by (1). + * + * The approach is to: + * - (A) build a priority queue of the suffix/outfixes based on their first + * match location; + * - (B) process the matches from the priority queue in order; + * - (C) As we report matches from (B) we interleave matches from the MPV if it + * exists. + */ + #ifndef ROSE_CATCHUP_H #define ROSE_CATCHUP_H @@ -35,43 +54,16 @@ #include "rose_common.h" #include "rose_internal.h" #include "ue2common.h" -#include "nfa/nfa_internal.h" -#include "util/bitutils.h" #include "util/multibit.h" -/* - * Rose has several components which run behind the main (floating table) clock - * and need to be caught up before we report matches. - * - * Currently we have to deal with: - * 1) Stored matches from the anchored matcher - * 2) Suffix/Outfix nfas - * 3) a single MPV nfa (chained) (which may also be triggered by (1) and (2)). - * - * The approach is to: - * A) build a priority queue of the suffix/outfixes based on their first match - * location - * B) process the matches from the anchored matches in order - * C) As we report a match from (B) we interleave matches from the suffixes - * D) As we report matches from (B) and (C) we interleave matches from the - * mpv if it exists. - */ /* Callbacks, defined in catchup.c */ -hwlmcb_rv_t roseCatchUpSufAndChains(s64a loc, struct hs_scratch *scratch); - hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch); -hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch); - - -/* will only catch mpv upto last reported external match */ +/* will only catch mpv up to last reported external match */ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch); -/* will only catch mpv upto last reported external match */ -hwlmcb_rv_t roseCatchUpAnchoredAndSuf(s64a loc, struct hs_scratch *scratch); - hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, struct hs_scratch *scratch); @@ -81,44 +73,42 @@ void streamInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch); static really_inline -hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, - struct hs_scratch *scratch) { - u64a cur_offset = loc + scratch->core_info.buf_offset; - assert(cur_offset >= scratch->tctxt.minMatchOffset); - - if (0) { - quick_exit: - updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset); - return HWLM_CONTINUE_MATCHING; - } - +int canSkipCatchUpMPV(const struct RoseEngine *t, struct hs_scratch *scratch, + u64a cur_offset) { if (!has_chained_nfas(t)) { - goto quick_exit; + return 1; } /* note: we may have to run at less than tctxt.minMatchOffset as we may * have a full queue of postponed events that we need to flush */ if (cur_offset < scratch->tctxt.next_mpv_offset) { - DEBUG_PRINTF("skipping cur_offset %lld min %lld, mpv %lld\n", + DEBUG_PRINTF("skipping cur_offset %llu min %llu, mpv %llu\n", cur_offset, scratch->tctxt.minMatchOffset, scratch->tctxt.next_mpv_offset); - goto quick_exit; + return 1; } assert(t->activeArrayCount); - DEBUG_PRINTF("cur offset offset: %lld\n", cur_offset); + DEBUG_PRINTF("cur offset offset: %llu\n", cur_offset); DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset); - DEBUG_PRINTF("roseCatchUpMPV to %lld\n", loc); - assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */ - u8 *aa = getActiveLeafArray(t, scratch->core_info.state); - u32 aaCount = t->activeArrayCount; + const u8 *aa = getActiveLeafArray(t, scratch->core_info.state); + return !mmbit_isset(aa, t->activeArrayCount, 0); +} + +/** \brief Catches up the MPV. */ +static really_inline +hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, + struct hs_scratch *scratch) { + u64a cur_offset = loc + scratch->core_info.buf_offset; + assert(cur_offset >= scratch->tctxt.minMatchOffset); - if (!mmbit_isset(aa, aaCount, 0)){ - goto quick_exit; + if (canSkipCatchUpMPV(t, scratch, cur_offset)) { + updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset); + return HWLM_CONTINUE_MATCHING; } /* Note: chained tails MUST not participate in the priority queue as @@ -128,20 +118,10 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, return roseCatchUpMPV_i(t, loc, scratch); } -static really_inline -u64a currentAnchoredEnd(const struct RoseEngine *t, struct RoseContext *tctxt) { - if (tctxt->curr_anchored_loc == MMB_INVALID) { - return ANCHORED_MATCH_SENTINEL; - } else { - return tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1; - } -} - -/* catches up nfas, anchored matches and the mpv */ +/** \brief Catches up NFAs and the MPV. */ static rose_inline hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - char in_anchored) { + struct hs_scratch *scratch, u64a end) { /* no need to catch up if we are at the same offset as last time */ if (end <= scratch->tctxt.minMatchOffset) { /* we must already be up to date */ @@ -158,24 +138,13 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, } assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset); - u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt); hwlmcb_rv_t rv; - if (in_anchored - || curr_anchored_end == ANCHORED_MATCH_SENTINEL - || curr_anchored_end > end) { - if (!t->activeArrayCount - || !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { - updateMinMatchOffset(&scratch->tctxt, end); - rv = HWLM_CONTINUE_MATCHING; - } else { - rv = roseCatchUpSufAndChains(loc, scratch); - } + if (!t->activeArrayCount + || !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + updateMinMatchOffset(&scratch->tctxt, end); + rv = HWLM_CONTINUE_MATCHING; } else { - if (!t->activeArrayCount) { - rv = roseCatchUpAnchoredOnly(loc, scratch); - } else { - rv = roseCatchUpAll(loc, scratch); - } + rv = roseCatchUpAll(loc, scratch); } assert(rv != HWLM_CONTINUE_MATCHING @@ -185,13 +154,16 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, return rv; } -/* Catches up anything which may add triggers on the mpv: anchored matches - * and suf/outfixes. The MPV will be run only to intersperse matches in - * the output match stream if external matches are raised. */ +/** + * \brief Catches up anything which may add triggers on the MPV (suffixes and + * outfixes). + * + * The MPV will be run only to intersperse matches in the output match stream + * if external matches are raised. + */ static rose_inline hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - char in_anchored) { + struct hs_scratch *scratch, u64a end) { /* no need to catch up if we are at the same offset as last time */ if (end <= scratch->tctxt.minNonMpvMatchOffset) { /* we must already be up to date */ @@ -203,27 +175,21 @@ hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, assert(t->activeArrayCount); /* mpv is in active array */ assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset); - u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt); - if (in_anchored - || curr_anchored_end == ANCHORED_MATCH_SENTINEL - || curr_anchored_end > end) { - if (!t->mpvTriggeredByLeaf) { - /* no need to check as they never put triggers onto the mpv */ - return HWLM_CONTINUE_MATCHING; - } - - /* sadly, this branch rarely gets taken as the mpv itself is usually - * alive. */ - char *state = scratch->core_info.state; - if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { - scratch->tctxt.minNonMpvMatchOffset = end; - return HWLM_CONTINUE_MATCHING; - } - - return roseCatchUpSuf(loc, scratch); - } else { - return roseCatchUpAnchoredAndSuf(loc, scratch); + + if (!t->mpvTriggeredByLeaf) { + /* no need to check as they never put triggers onto the mpv */ + return HWLM_CONTINUE_MATCHING; + } + + /* sadly, this branch rarely gets taken as the mpv itself is usually + * alive. */ + char *state = scratch->core_info.state; + if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + scratch->tctxt.minNonMpvMatchOffset = end; + return HWLM_CONTINUE_MATCHING; } + + return roseCatchUpSuf(loc, scratch); } #endif diff --git a/src/rose/eod.c b/src/rose/eod.c index c6f9e09e6..24e9113d7 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -50,8 +50,6 @@ void initContext(const struct RoseEngine *t, char *state, u64a offset, tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = offset; - tctxt->curr_anchored_loc = MMB_INVALID; - tctxt->curr_row_offset = 0; scratch->catchup_pq.qm_size = 0; scratch->al_log_sum = 0; /* clear the anchored logs */ @@ -332,6 +330,10 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, scratch->core_info.len, scratch->core_info.hbuf, scratch->core_info.hlen); + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + if (t->maxBiAnchoredWidth != ROSE_BOUND_INF && offset > t->maxBiAnchoredWidth) { DEBUG_PRINTF("bailing, we are beyond max width\n"); diff --git a/src/rose/infix.h b/src/rose/infix.h index e3abc7fda..9cf9c0ad7 100644 --- a/src/rose/infix.h +++ b/src/rose/infix.h @@ -32,6 +32,7 @@ #include "ue2common.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_queue.h" +#include "nfa/nfa_internal.h" static really_inline int infixTooOld(struct mq *q, s64a curr_loc) { diff --git a/src/rose/match.c b/src/rose/match.c index d626950bc..f62a58242 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -90,19 +90,15 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); - if (isLiteralDR(id)) { - return tctx->groups; - } - - assert(id < t->literalCount); const u32 *delayRebuildPrograms = getByOffset(t, t->litDelayRebuildProgramOffset); - const u32 programOffset = delayRebuildPrograms[id]; + assert(id < t->literalCount); + const u32 program = delayRebuildPrograms[id]; - if (programOffset) { + if (program) { const size_t match_len = end - start + 1; UNUSED hwlmcb_rv_t rv = - roseRunProgram(t, scratch, programOffset, real_end, match_len, 0); + roseRunProgram(t, scratch, program, real_end, match_len, 0); assert(rv != HWLM_TERMINATE_MATCHING); } @@ -115,31 +111,8 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, static really_inline hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, - char in_anchored, char in_chained) { - return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_anchored, - in_chained); -} - -static rose_inline -void recordAnchoredMatch(const struct RoseEngine *t, struct hs_scratch *scratch, - ReportID reportId, u64a end) { - struct fatbit **anchoredRows = getAnchoredLog(scratch); - - DEBUG_PRINTF("record %u @ %llu\n", reportId, end); - assert(end - t->maxSafeAnchoredDROffset >= 1); - u32 adj_end = end - t->maxSafeAnchoredDROffset - 1; - DEBUG_PRINTF("adjusted location %u/%u\n", adj_end, - scratch->anchored_region_len); - - if (!bf64_set(&scratch->am_log_sum, adj_end)) { - // first time, clear row - fatbit_clear(anchoredRows[adj_end]); - } - - u32 idx = getAnchoredInverseMap(t)[reportId]; - DEBUG_PRINTF("record %u @ %llu index %u\n", reportId, end, idx); - assert(idx < t->anchoredMatches); - fatbit_set(anchoredRows[adj_end], t->anchoredMatches, idx); + char in_chained) { + return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained); } static rose_inline @@ -166,7 +139,7 @@ void recordAnchoredLiteralMatch(const struct RoseEngine *t, hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, ReportID r, - u64a end, char in_anchored, char in_catchup) { + u64a end, char in_catchup) { struct core_info *ci = &scratch->core_info; u8 *aa = getActiveLeafArray(t, scratch->core_info.state); @@ -209,7 +182,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); /* we know it is a chained nfa and the suffixes/outfixes must already * be known to be consistent */ - if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_anchored, in_catchup) + if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -255,7 +228,7 @@ hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end, struct hs_scratch *scratch) { struct RoseContext *tctxt = &scratch->tctxt; - assert(end == tctxt->minMatchOffset); + assert(!t->needsCatchup || end == tctxt->minMatchOffset); DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); updateLastMatchOffset(tctxt, end); @@ -272,38 +245,6 @@ hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end, return roseHaltIfExhausted(t, scratch); } -/* handles catchup, som, cb, etc */ -static really_inline -hwlmcb_rv_t roseHandleDirectReport(const struct RoseEngine *t, - struct hs_scratch *scratch, ReportID id, - u64a offset, char in_anchored) { - // The direct report path is only used for external reports. - assert(isExternalReport(getInternalReport(t, id))); - - if (roseCatchUpTo(t, scratch, offset, in_anchored) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - return roseHandleMatch(t, id, offset, scratch); -} - -static really_inline -hwlmcb_rv_t roseHandleAnchoredDirectReport(const struct RoseEngine *t, - struct hs_scratch *scratch, - u64a real_end, ReportID report) { - DEBUG_PRINTF("direct report %u, real_end=%llu\n", report, real_end); - - if (real_end > t->maxSafeAnchoredDROffset) { - DEBUG_PRINTF("match in overlapped anchored region --> stash\n"); - recordAnchoredMatch(t, scratch, report, real_end); - return HWLM_CONTINUE_MATCHING; - } - - return roseHandleDirectReport(t, scratch, report, real_end, - 1 /* in anchored */); -} - int roseAnchoredCallback(u64a end, u32 id, void *ctx) { struct RoseContext *tctxt = ctx; struct hs_scratch *scratch = tctxtToScratch(tctxt); @@ -320,7 +261,7 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { return MO_HALT_MATCHING; } - hwlmcb_rv_t rv = HWLM_CONTINUE_MATCHING; + const size_t match_len = 0; /* delayed literals need to be delivered before real literals; however * delayed literals only come from the floating table so if we are going @@ -329,46 +270,14 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { /* no history checks from anchored region and we are before the flush * boundary */ - if (isLiteralMDR(id)) { - // Multi-direct report, list of reports indexed by the ID. - u32 mdr_offset = id & ~LITERAL_MDR_FLAG; - const ReportID *report = - (const ReportID *)((const char *)t + t->multidirectOffset) + - mdr_offset; - for (; *report != MO_INVALID_IDX; report++) { - rv = roseHandleAnchoredDirectReport(t, scratch, real_end, *report); - if (rv == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } - } - return MO_CONTINUE_MATCHING; - } else if (isLiteralDR(id)) { - // Single direct report. - ReportID report = literalToReport(id); - rv = roseHandleAnchoredDirectReport(t, scratch, real_end, report); - if (rv == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } - return MO_CONTINUE_MATCHING; - } - - assert(id < t->literalCount); - const u32 *programs = getByOffset(t, t->litProgramOffset); - const u32 programOffset = programs[id]; - assert(programOffset); - - // Anchored literals are never delayed. - assert(!((const u32 *)getByOffset(t, t->litDelayRebuildProgramOffset))[id]); - - DEBUG_PRINTF("literal id=%u\n", id); - if (real_end <= t->floatingMinLiteralMatchOffset) { roseFlushLastByteHistory(t, scratch, real_end); tctxt->lastEndOffset = real_end; } - const size_t match_len = 0; - if (roseRunProgram(t, scratch, programOffset, real_end, match_len, 1) == + const u32 *programs = getByOffset(t, t->litProgramOffset); + assert(id < t->literalCount); + if (roseRunProgram(t, scratch, programs[id], real_end, match_len, 1) == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); @@ -387,65 +296,15 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { // Rose match-processing workhorse /* assumes not in_anchored */ static really_inline -hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - size_t match_len, u32 id, char in_delay_play, - char in_anch_playback) { +hwlmcb_rv_t roseProcessMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + size_t match_len, u32 id) { DEBUG_PRINTF("id=%u\n", id); - - if (!in_anch_playback && !in_delay_play) { - if (isLiteralMDR(id)) { - // Multi-direct report, list of reports indexed by the ID. - u32 mdr_offset = id & ~LITERAL_MDR_FLAG; - const ReportID *report = - (const ReportID *)((const char *)t + t->multidirectOffset) + - mdr_offset; - for (; *report != MO_INVALID_IDX; report++) { - DEBUG_PRINTF("handle multi-direct report %u\n", *report); - hwlmcb_rv_t rv = roseHandleDirectReport(t, scratch, *report, - end, 0 /* in anchored */); - if (rv == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - return HWLM_CONTINUE_MATCHING; - } else if (isLiteralDR(id)) { - // Single direct report. - ReportID report = literalToReport(id); - DEBUG_PRINTF("handle direct report %u\n", report); - return roseHandleDirectReport(t, scratch, report, end, - 0 /* in anchored */); - } - } - - assert(id < t->literalCount); const u32 *programs = getByOffset(t, t->litProgramOffset); + assert(id < t->literalCount); return roseRunProgram(t, scratch, programs[id], end, match_len, 0); } -static never_inline -hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - u32 id) { - size_t match_len = 0; - return roseProcessMatch_i(t, scratch, end, match_len, id, 1, 0); -} - -static never_inline -hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, - u64a end, u32 id) { - size_t match_len = 0; - return roseProcessMatch_i(t, scratch, end, match_len, id, 0, 1); -} - -static really_inline -hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - size_t match_len, u32 id) { - return roseProcessMatch_i(t, scratch, end, match_len, id, 0, 0); -} - static rose_inline hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct hs_scratch *scratch, @@ -472,8 +331,7 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, UNUSED rose_group old_groups = tctxt->groups; DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", literal_id, offset); - hwlmcb_rv_t rv = - roseProcessDelayedMatch(t, scratch, offset, literal_id); + hwlmcb_rv_t rv = roseProcessMatch(t, scratch, offset, 0, literal_id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* delayed literals can't safely set groups. @@ -507,8 +365,7 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, rose_group old_groups = tctxt->groups; DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", literal_id, curr_loc); - hwlmcb_rv_t rv = - roseProcessDelayedAnchoredMatch(t, scratch, curr_loc, literal_id); + hwlmcb_rv_t rv = roseProcessMatch(t, scratch, curr_loc, 0, literal_id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* anchored literals can't safely set groups. @@ -707,7 +564,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { } size_t match_len = end - start + 1; - rv = roseProcessMainMatch(t, scratch, real_end, match_len, id); + rv = roseProcessMatch(t, scratch, real_end, match_len, id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); @@ -719,3 +576,23 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { DEBUG_PRINTF("user requested halt\n"); return HWLM_TERMINATE_MATCHING; } + +/** + * \brief Match callback adaptor used for matches from pure-literal cases. + * + * Literal match IDs in this path run limited Rose programs that do not use + * Rose state (which is not initialised in the pure-literal path). They can + * still, for example, check lookarounds or literal masks. + */ +hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, + void *context) { + DEBUG_PRINTF("start=%zu, end=%zu, id=%u\n", start, end, id); + struct hs_scratch *scratch = context; + struct core_info *ci = &scratch->core_info; + const u64a real_end = (u64a)end + ci->buf_offset + 1; + const size_t match_len = end - start + 1; + const struct RoseEngine *rose = ci->rose; + const u32 *programs = getByOffset(rose, rose->litProgramOffset); + assert(id < rose->literalCount); + return roseRunProgram(rose, scratch, programs[id], real_end, match_len, 0); +} diff --git a/src/rose/match.h b/src/rose/match.h index 0629d8d7b..f98891395 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -59,27 +59,9 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx); /* Common code, used all over Rose runtime */ -static rose_inline -void resetAnchoredLog(const struct RoseEngine *t, struct hs_scratch *scratch) { - struct fatbit **anchoredRows = getAnchoredLog(scratch); - u32 region_width = t->anchoredMatches; - struct RoseContext *tctxt = &scratch->tctxt; - - tctxt->curr_anchored_loc = bf64_iterate(scratch->am_log_sum, MMB_INVALID); - if (tctxt->curr_anchored_loc != MMB_INVALID) { - assert(tctxt->curr_anchored_loc < scratch->anchored_region_len); - struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc]; - tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, - MMB_INVALID); - assert(tctxt->curr_row_offset != MMB_INVALID); - } - DEBUG_PRINTF("AL reset --> %u, %u\n", tctxt->curr_anchored_loc, - tctxt->curr_row_offset); -} - hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, ReportID r, - u64a end, char in_anchored, char in_catchup); + u64a end, char in_catchup); static really_inline void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 3d25d6824..d816d62e1 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -225,8 +225,7 @@ hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t, static really_inline hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, - char is_mpv, char in_anchored, - char in_catchup) { + char is_mpv, char in_catchup) { struct RoseContext *tctxt = &scratch->tctxt; u8 *aa = getActiveLeafArray(t, scratch->core_info.state); struct fatbit *activeQueues = scratch->aqa; @@ -258,8 +257,8 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, } } - if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset, - in_anchored) == HWLM_TERMINATE_MATCHING) { + if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } else { @@ -286,15 +285,14 @@ hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, static rose_inline hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc, - char in_anchored) { - return ensureQueueFlushed_i(t, scratch, qi, loc, 0, in_anchored, 0); + struct hs_scratch *scratch, u32 qi, s64a loc) { + return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0); } static rose_inline hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, u32 top, - u64a som, u64a end, char in_anchored) { + u64a som, u64a end) { DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); struct core_info *ci = &scratch->core_info; @@ -330,7 +328,7 @@ hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); - } else if (ensureQueueFlushed(t, scratch, qi, loc, in_anchored) + } else if (ensureQueueFlushed(t, scratch, qi, loc) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -575,18 +573,20 @@ void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, static rose_inline hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, - ReportID id, u64a end, char is_exhaustible) { - assert(end == scratch->tctxt.minMatchOffset); + u64a end, ReportID id, ReportID onmatch, + s32 offset_adjust, u32 ekey) { + assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); updateLastMatchOffset(&scratch->tctxt, end); - int cb_rv = roseDeliverReport(end, id, scratch, is_exhaustible); + int cb_rv = roseDeliverReport(end, id, onmatch, offset_adjust, scratch, + ekey); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; } - if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return HWLM_CONTINUE_MATCHING; } @@ -599,14 +599,12 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, static rose_inline hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, - ReportID r, u64a end, - char in_anchored) { - if (roseCatchUpMpvFeeders(t, scratch, end, in_anchored) == - HWLM_TERMINATE_MATCHING) { + ReportID r, u64a end) { + if (roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - return roseHandleChainMatch(t, scratch, r, end, in_anchored, 0); + return roseHandleChainMatch(t, scratch, r, end, 0); } static rose_inline @@ -618,7 +616,7 @@ void roseHandleSom(const struct RoseEngine *t, struct hs_scratch *scratch, // Reach into reports and handle internal reports that just manipulate SOM // slots ourselves, rather than going through the callback. - assert(end == scratch->tctxt.minMatchOffset); + assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); updateLastMatchOffset(&scratch->tctxt, end); @@ -630,11 +628,12 @@ static rose_inline hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, struct hs_scratch *scratch, ReportID id, u64a start, u64a end, char is_exhaustible) { - assert(end == scratch->tctxt.minMatchOffset); + assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); updateLastMatchOffset(&scratch->tctxt, end); - int cb_rv = roseDeliverSomReport(start, end, id, scratch, is_exhaustible); + const struct internal_report *ir = getInternalReport(t, id); + int cb_rv = roseDeliverSomReport(start, end, ir, scratch, is_exhaustible); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; @@ -656,7 +655,7 @@ void roseHandleSomSom(const struct RoseEngine *t, ReportID id, u64a start, // Reach into reports and handle internal reports that just manipulate SOM // slots ourselves, rather than going through the callback. - assert(end == scratch->tctxt.minMatchOffset); + assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); updateLastMatchOffset(&scratch->tctxt, end); const struct internal_report *ri = getInternalReport(t, id); @@ -967,8 +966,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CATCH_UP) { - if (roseCatchUpTo(t, scratch, end, in_anchored) == - HWLM_TERMINATE_MATCHING) { + if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -1010,8 +1008,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_CASE(TRIGGER_SUFFIX) { if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, - end, in_anchored) - == HWLM_TERMINATE_MATCHING) { + end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1056,8 +1053,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_CHAIN) { - if (roseCatchUpAndHandleChainMatch(t, scratch, ri->report, end, - in_anchored) == + if (roseCatchUpAndHandleChainMatch(t, scratch, ri->report, + end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1078,9 +1075,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT) { - const char is_exhaustible = 0; - if (roseReport(t, scratch, ri->report, end, is_exhaustible) == - HWLM_TERMINATE_MATCHING) { + if (roseReport(t, scratch, end, ri->report, ri->onmatch, + ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1088,9 +1085,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_EXHAUST) { - const char is_exhaustible = 1; - if (roseReport(t, scratch, ri->report, end, is_exhaustible) == - HWLM_TERMINATE_MATCHING) { + if (roseReport(t, scratch, end, ri->report, ri->onmatch, + ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1117,6 +1114,33 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(DEDUPE_AND_REPORT) { + const struct internal_report *ir = + getInternalReport(t, ri->report); + const char do_som = t->hasSom; // FIXME: constant propagate + enum DedupeResult rv = dedupeCatchup( + t, ir, scratch, end, som, end + ir->offsetAdjust, do_som); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + case DEDUPE_CONTINUE: + break; + } + + const u32 ekey = INVALID_EKEY; + if (roseReport(t, scratch, end, ri->report, ir->onmatch, + ir->offsetAdjust, + ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_EXHAUSTED) { DEBUG_PRINTF("check ekey %u\n", ri->ekey); assert(ri->ekey != INVALID_EKEY); diff --git a/src/rose/rose.h b/src/rose/rose.h index 95750363c..5dc57bc2e 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -85,6 +85,10 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, assert(scratch); assert(scratch->core_info.buf); + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + // If this block is shorter than our minimum width, then no pattern in this // RoseEngine could match. /* minWidth checks should have already been performed by the caller */ @@ -124,4 +128,7 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback); +hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, + void *context); + #endif // ROSE_H diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 57faa46c8..805dc920e 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -231,10 +231,6 @@ u32 anchoredStateSize(const anchored_matcher_info &atable) { return curr->state_offset + nfa->streamStateSize; } -bool anchoredIsMulti(const anchored_matcher_info &atable) { - return atable.next_offset; -} - namespace { typedef bitfield nfa_state_set; @@ -742,21 +738,24 @@ void buildSimpleDfas(const RoseBuildImpl &tbi, * from RoseBuildImpl. */ static -void getAnchoredDfas(RoseBuildImpl &tbi, - vector> *anchored_dfas) { +vector> getAnchoredDfas(RoseBuildImpl &build) { + vector> dfas; + // DFAs that already exist as raw_dfas. - for (auto &anch_dfas : tbi.anchored_nfas) { + for (auto &anch_dfas : build.anchored_nfas) { for (auto &rdfa : anch_dfas.second) { - anchored_dfas->push_back(move(rdfa)); + dfas.push_back(move(rdfa)); } } - tbi.anchored_nfas.clear(); + build.anchored_nfas.clear(); // DFAs we currently have as simple literals. - if (!tbi.anchored_simple.empty()) { - buildSimpleDfas(tbi, anchored_dfas); - tbi.anchored_simple.clear(); + if (!build.anchored_simple.empty()) { + buildSimpleDfas(build, &dfas); + build.anchored_simple.clear(); } + + return dfas; } /** @@ -770,9 +769,9 @@ void getAnchoredDfas(RoseBuildImpl &tbi, * \return Total bytes required for the complete anchored matcher. */ static -size_t buildNfas(vector> &anchored_dfas, - vector> *nfas, vector *start_offset, - const CompileContext &cc) { +size_t buildNfas(vector &anchored_dfas, + vector> *nfas, + vector *start_offset, const CompileContext &cc) { const size_t num_dfas = anchored_dfas.size(); nfas->reserve(num_dfas); @@ -781,12 +780,12 @@ size_t buildNfas(vector> &anchored_dfas, size_t total_size = 0; for (auto &rdfa : anchored_dfas) { - u32 removed_dots = remove_leading_dots(*rdfa); + u32 removed_dots = remove_leading_dots(rdfa); start_offset->push_back(removed_dots); - minimize_hopcroft(*rdfa, cc.grey); + minimize_hopcroft(rdfa, cc.grey); - aligned_unique_ptr nfa = mcclellanCompile(*rdfa, cc); + auto nfa = mcclellanCompile(rdfa, cc); if (!nfa) { assert(0); throw std::bad_alloc(); @@ -803,25 +802,41 @@ size_t buildNfas(vector> &anchored_dfas, return total_size; } +vector buildAnchoredDfas(RoseBuildImpl &build) { + vector dfas; + + if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { + DEBUG_PRINTF("empty\n"); + return dfas; + } + + remapAnchoredReports(build); + + auto anch_dfas = getAnchoredDfas(build); + mergeAnchoredDfas(anch_dfas, build); + + dfas.reserve(anch_dfas.size()); + for (auto &rdfa : anch_dfas) { + assert(rdfa); + dfas.push_back(move(*rdfa)); + } + return dfas; +} + aligned_unique_ptr -buildAnchoredAutomataMatcher(RoseBuildImpl &build, size_t *asize) { +buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, + size_t *asize) { const CompileContext &cc = build.cc; - remapAnchoredReports(build); - if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { + if (dfas.empty()) { DEBUG_PRINTF("empty\n"); *asize = 0; return nullptr; } - vector> anchored_dfas; - getAnchoredDfas(build, &anchored_dfas); - - mergeAnchoredDfas(anchored_dfas, build); - vector> nfas; vector start_offset; // start offset for each dfa (dots removed) - size_t total_size = buildNfas(anchored_dfas, &nfas, &start_offset, cc); + size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc); if (total_size > cc.grey.limitRoseAnchoredSize) { throw ResourceLimitError(); diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index d399907b0..a5317f894 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -46,17 +46,22 @@ namespace ue2 { class NGHolder; class RoseBuildImpl; struct Grey; +struct raw_dfa; -aligned_unique_ptr -buildAnchoredAutomataMatcher(RoseBuildImpl &build, size_t *asize); - -u32 anchoredStateSize(const anchored_matcher_info &atable); +/** + * \brief Construct a set of anchored DFAs from our anchored literals/engines. + */ +std::vector buildAnchoredDfas(RoseBuildImpl &build); /** - * \brief True if there is an anchored matcher and it consists of multiple - * DFAs. + * \brief Construct an anchored_matcher_info runtime structure from the given + * set of DFAs. */ -bool anchoredIsMulti(const anchored_matcher_info &atable); +aligned_unique_ptr +buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, + size_t *asize); + +u32 anchoredStateSize(const anchored_matcher_info &atable); #define ANCHORED_FAIL 0 #define ANCHORED_SUCCESS 1 diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a6ab726df..401c15c15 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -214,6 +214,7 @@ class RoseInstruction { case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust; case ROSE_INSTR_REPORT_SOM: return &u.reportSom; case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust; + case ROSE_INSTR_DEDUPE_AND_REPORT: return &u.dedupeAndReport; case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted; case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength; case ROSE_INSTR_SET_STATE: return &u.setState; @@ -257,6 +258,7 @@ class RoseInstruction { case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust); case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust); + case ROSE_INSTR_DEDUPE_AND_REPORT: return sizeof(u.dedupeAndReport); case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted); case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); @@ -299,6 +301,7 @@ class RoseInstruction { ROSE_STRUCT_REPORT_EXHAUST reportExhaust; ROSE_STRUCT_REPORT_SOM reportSom; ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust; + ROSE_STRUCT_DEDUPE_AND_REPORT dedupeAndReport; ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted; ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength; ROSE_STRUCT_SET_STATE setState; @@ -326,6 +329,25 @@ size_t hash_value(const RoseInstruction &ri) { return val; } +/** + * \brief Structure tracking which resources are used by this Rose instance at + * runtime. + * + * We use this to control how much initialisation we need to do at the + * beginning of a stream/block at runtime. + */ +struct RoseResources { + bool has_outfixes = false; + bool has_suffixes = false; + bool has_leftfixes = false; + bool has_literals = false; + bool has_states = false; + bool checks_groups = false; + bool has_lit_delay = false; + bool has_lit_mask = false; + bool has_anchored = false; +}; + struct build_context : boost::noncopyable { /** \brief information about engines to the left of a vertex */ map leftfix_info; @@ -373,6 +395,13 @@ struct build_context : boost::noncopyable { * RoseEngine. */ vector> engine_blob; + /** \brief True if reports need CATCH_UP instructions, to catch up anchored + * matches, suffixes, outfixes etc. */ + bool needs_catchup = false; + + /** \brief Resources in use (tracked as programs are added). */ + RoseResources resources; + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ static constexpr u32 engine_blob_base = ROUNDUP_CL(sizeof(RoseEngine)); }; @@ -477,42 +506,74 @@ u32 countRosePrefixes(const vector &roses) { return num; } +/** + * \brief True if this Rose engine needs to run a catch up whenever a report is + * generated. + * + * This is only the case if there are no anchored literals, suffixes, outfixes + * etc. + */ static -bool isPureFloating(const RoseBuildImpl &tbi) { - if (!tbi.outfixes.empty()) { +bool needsCatchup(const RoseBuildImpl &build) { + if (!build.outfixes.empty()) { DEBUG_PRINTF("has outfixes\n"); - return false; + return true; } - const RoseGraph &g = tbi.g; + const RoseGraph &g = build.g; - if (!isLeafNode(tbi.anchored_root, g)) { + if (!isLeafNode(build.anchored_root, g)) { DEBUG_PRINTF("has anchored vertices\n"); - return false; + return true; } for (auto v : vertices_range(g)) { - if (tbi.root == v) { + if (build.root == v) { continue; } - if (tbi.anchored_root == v) { + if (build.anchored_root == v) { assert(isLeafNode(v, g)); continue; } - if (!tbi.allDirectFinalIds(v) || !tbi.isFloating(v)) { - DEBUG_PRINTF("vertex %zu isn't floating and direct\n", g[v].idx); - return false; + if (g[v].suffix) { + DEBUG_PRINTF("vertex %zu has suffix\n", g[v].idx); + return true; } - for (ReportID r : g[v].reports) { - const Report &ri = tbi.rm.getReport(r); - if (!isExternalReport(ri)) { - DEBUG_PRINTF("vertex %zu has non-external report\n", g[v].idx); - return false; - } - } + } + + DEBUG_PRINTF("no need for catch-up on report\n"); + return false; +} + +static +bool isPureFloating(const RoseResources &resources) { + if (resources.has_outfixes || resources.has_suffixes || + resources.has_leftfixes) { + DEBUG_PRINTF("has engines\n"); + return false; + } + + if (resources.has_anchored) { + DEBUG_PRINTF("has anchored matcher\n"); + return false; + } + + if (resources.has_states) { + DEBUG_PRINTF("has states\n"); + return false; + } + + if (resources.has_lit_delay) { + DEBUG_PRINTF("has delayed literals\n"); + return false; + } + + if (resources.checks_groups) { + DEBUG_PRINTF("has group checks\n"); + return false; } DEBUG_PRINTF("pure floating literals\n"); @@ -544,12 +605,23 @@ bool isSingleOutfix(const RoseBuildImpl &tbi, u32 outfixEndQueue) { } static -u8 pickRuntimeImpl(const RoseBuildImpl &tbi, u32 outfixEndQueue) { - if (isPureFloating(tbi)) { +u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, + u32 outfixEndQueue) { + DEBUG_PRINTF("has_outfixes=%d\n", bc.resources.has_outfixes); + DEBUG_PRINTF("has_suffixes=%d\n", bc.resources.has_suffixes); + DEBUG_PRINTF("has_leftfixes=%d\n", bc.resources.has_leftfixes); + DEBUG_PRINTF("has_literals=%d\n", bc.resources.has_literals); + DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states); + DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups); + DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); + DEBUG_PRINTF("has_lit_mask=%d\n", bc.resources.has_lit_mask); + DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); + + if (isPureFloating(bc.resources)) { return ROSE_RUNTIME_PURE_LITERAL; } - if (isSingleOutfix(tbi, outfixEndQueue)) { + if (isSingleOutfix(build, outfixEndQueue)) { return ROSE_RUNTIME_SINGLE_OUTFIX; } @@ -1880,31 +1952,27 @@ bool findHamsterMask(const RoseBuildImpl &tbi, const rose_literal_id &id, } static -bool isDirectHighlander(const RoseBuildImpl &tbi, +bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, const rose_literal_info &info) { - u32 final_id = info.final_id; - assert(final_id != MO_INVALID_IDX); - - if ((final_id & LITERAL_MDR_FLAG) == LITERAL_MDR_FLAG) { - u32 i = final_id & ~LITERAL_MDR_FLAG; - assert(i < tbi.mdr_reports.size()); - for (ReportID report = tbi.mdr_reports[i]; report != MO_INVALID_IDX; - report = tbi.mdr_reports[++i]) { - const Report &ir = tbi.rm.getReport(report); - if (!isSimpleExhaustible(ir)) { - return false; - } - } - return true; - } else if (final_id & LITERAL_DR_FLAG) { - ReportID report = final_id & ~LITERAL_DR_FLAG; - const Report &ir = tbi.rm.getReport(report); - if (isSimpleExhaustible(ir)) { - return true; - } + if (!build.isDirectReport(id)) { + return false; } - return false; + auto is_simple_exhaustible = [&build](ReportID id) { + const Report &report = build.rm.getReport(id); + return isSimpleExhaustible(report); + }; + + assert(!info.vertices.empty()); + for (const auto &v : info.vertices) { + const auto &reports = build.g[v].reports; + assert(!reports.empty()); + if (!all_of(begin(reports), end(reports), + is_simple_exhaustible)) { + return false; + } + } + return true; } // Called by isNoRunsLiteral below. @@ -1958,7 +2026,7 @@ bool isNoRunsVertex(const RoseBuildImpl &tbi, NFAVertex u) { } static -bool isNoRunsLiteral(const RoseBuildImpl &tbi, UNUSED const u32 id, +bool isNoRunsLiteral(const RoseBuildImpl &tbi, const u32 id, const rose_literal_info &info) { DEBUG_PRINTF("lit id %u\n", id); @@ -1967,7 +2035,7 @@ bool isNoRunsLiteral(const RoseBuildImpl &tbi, UNUSED const u32 id, return false; } - if (isDirectHighlander(tbi, info)) { + if (isDirectHighlander(tbi, id, info)) { DEBUG_PRINTF("highlander direct report\n"); return true; } @@ -2293,8 +2361,8 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre static u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, - const anchored_matcher_info *atable) { - if (atable && anchoredIsMulti(*atable)) { + const vector &anchored_dfas) { + if (anchored_dfas.size() > 1) { DEBUG_PRINTF("multiple anchored dfas\n"); /* We must regard matches from other anchored tables as unordered, as * we do for floating matches. */ @@ -2739,6 +2807,9 @@ flattenProgram(const vector> &programs) { case ROSE_INSTR_DEDUPE_SOM: ri.u.dedupeSom.fail_jump = jump_val; break; + case ROSE_INSTR_DEDUPE_AND_REPORT: + ri.u.dedupeAndReport.fail_jump = jump_val; + break; case ROSE_INSTR_CHECK_EXHAUSTED: ri.u.checkExhausted.fail_jump = jump_val; break; @@ -2765,6 +2836,55 @@ flattenProgram(const vector> &programs) { return out; } +static +void recordResources(RoseResources &resources, + const vector &program) { + for (const auto &ri : program) { + switch (ri.code()) { + case ROSE_INSTR_TRIGGER_SUFFIX: + resources.has_suffixes = true; + break; + case ROSE_INSTR_TRIGGER_INFIX: + case ROSE_INSTR_CHECK_INFIX: + case ROSE_INSTR_CHECK_PREFIX: + case ROSE_INSTR_SOM_LEFTFIX: + resources.has_leftfixes = true; + break; + case ROSE_INSTR_SET_STATE: + case ROSE_INSTR_CHECK_STATE: + case ROSE_INSTR_SPARSE_ITER_BEGIN: + case ROSE_INSTR_SPARSE_ITER_NEXT: + resources.has_states = true; + break; + case ROSE_INSTR_CHECK_GROUPS: + resources.checks_groups = true; + break; + case ROSE_INSTR_PUSH_DELAYED: + resources.has_lit_delay = true; + break; + case ROSE_INSTR_CHECK_LIT_MASK: + resources.has_lit_mask = true; + break; + default: + break; + } + } +} + +static +void recordResources(RoseResources &resources, + const RoseBuildImpl &build) { + if (!build.outfixes.empty()) { + resources.has_outfixes = true; + } + for (u32 i = 0; i < build.literal_info.size(); i++) { + if (build.hasFinalId(i)) { + resources.has_literals = true; + break; + } + } +} + static u32 writeProgram(build_context &bc, const vector &program) { if (program.empty()) { @@ -2788,6 +2908,8 @@ u32 writeProgram(build_context &bc, const vector &program) { return it->second; } + recordResources(bc.resources, program); + DEBUG_PRINTF("writing %zu instructions\n", program.size()); u32 programOffset = 0; for (const auto &ri : program) { @@ -3045,14 +3167,14 @@ void makeDedupeSom(const ReportID id, vector &report_block) { } static -void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, - vector &program) { +void makeReport(RoseBuildImpl &build, build_context &bc, const ReportID id, + const bool has_som, vector &program) { assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); vector report_block; - // Similarly, we can handle min/max offset checks. + // Handle min/max offset checks. if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK); @@ -3064,7 +3186,7 @@ void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, // Catch up -- everything except the INTERNAL_ROSE_CHAIN report needs this. // TODO: this could be floated in front of all the reports and only done // once. - if (report.type != INTERNAL_ROSE_CHAIN) { + if (bc.needs_catchup && report.type != INTERNAL_ROSE_CHAIN) { report_block.emplace_back(ROSE_INSTR_CATCH_UP); } @@ -3103,15 +3225,29 @@ void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, if (!has_som) { // Dedupe is only necessary if this report has a dkey, or if there // are SOM reports to catch up. - if (build.rm.getDkey(report) != ~0U || build.hasSom) { - makeDedupe(id, report_block); - } + bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT); - report_block.back().u.report.report = id; + if (needs_dedupe) { + report_block.emplace_back(ROSE_INSTR_DEDUPE_AND_REPORT, + JumpTarget::NEXT_BLOCK); + report_block.back().u.dedupeAndReport.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT); + auto &ri = report_block.back(); + ri.u.report.report = id; + ri.u.report.onmatch = report.onmatch; + ri.u.report.offset_adjust = report.offsetAdjust; + } } else { + if (needs_dedupe) { + makeDedupe(id, report_block); + } report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST); - report_block.back().u.reportExhaust.report = id; + auto &ri = report_block.back(); + ri.u.reportExhaust.report = id; + ri.u.reportExhaust.onmatch = report.onmatch; + ri.u.reportExhaust.offset_adjust = report.offsetAdjust; + ri.u.reportExhaust.ekey = report.ekey; } } else { // has_som makeDedupeSom(id, report_block); @@ -3196,7 +3332,7 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, } for (ReportID id : g[v].reports) { - makeReport(build, id, has_som, program); + makeReport(build, bc, id, has_som, program); } } @@ -3869,6 +4005,20 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, program.push_back(RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY)); } +static +bool hasDelayedLiteral(RoseBuildImpl &build, + const vector &lit_edges) { + auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); + for (const auto &e : lit_edges) { + auto v = target(e, build.g); + const auto &lits = build.g[v].literals; + if (any_of(begin(lits), end(lits), is_delayed)) { + return true; + } + } + return false; +} + static vector buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, @@ -3885,8 +4035,12 @@ vector buildLitInitialProgram(RoseBuildImpl &build, // Check lit mask. makeCheckLitMaskInstruction(build, final_id, pre_program); - // Check literal groups. - makeGroupCheckInstruction(build, final_id, pre_program); + // Check literal groups. This is an optimisation that we only perform for + // delayed literals, as their groups may be switched off; ordinarily, we + // can trust the HWLM matcher. + if (hasDelayedLiteral(build, lit_edges)) { + makeGroupCheckInstruction(build, final_id, pre_program); + } // Add instructions for pushing delayed matches, if there are any. makePushDelayedInstructions(build, final_id, pre_program); @@ -3982,8 +4136,8 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { for (const auto &lit_id : g[v].literals) { assert(lit_id < build.literal_info.size()); u32 final_id = build.literal_info.at(lit_id).final_id; - if (final_id == MO_INVALID_IDX || final_id & LITERAL_MDR_FLAG) { - // Unused, special or direct report IDs are handled elsewhere. + if (final_id == MO_INVALID_IDX) { + // Unused, special report IDs are handled elsewhere. continue; } unique_lit_edge_map[final_id].insert(e); @@ -4054,8 +4208,9 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeRoleCheckNotHandled(bc, v, program); } + const bool has_som = false; for (const auto &id : g[v].reports) { - makeReport(build, id, false, program); + makeReport(build, bc, id, has_som, program); } return program; @@ -4135,34 +4290,6 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); } -static -void calcAnchoredMatches(const RoseBuildImpl &build, vector &art, - vector &arit) { - const RoseGraph &g = build.g; - - u32 max_report = 0; - - for (RoseVertex v : vertices_range(g)) { - if (!build.isAnchored(v)) { - continue; - } - - for (ReportID r : g[v].reports) { - art.push_back(r); - max_report = max(max_report, r); - } - } - - assert(max_report < MO_INVALID_IDX); - - arit.resize(max_report + 1, MO_INVALID_IDX); - for (u32 i = 0; i < art.size(); i++) { - DEBUG_PRINTF("art[%u] = %u\n", i, art[i]); - arit[art[i]] = i; - DEBUG_PRINTF("arit[%u] = %u\n", art[i], arit[art[i]]); - } -} - static u32 history_required(const rose_literal_id &key) { if (key.msk.size() < key.s.length()) { @@ -4264,22 +4391,18 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DerivedBoundaryReports dboundary(boundary); - // Build literal matchers - size_t asize = 0, fsize = 0, esize = 0, sbsize = 0; - - size_t floatingStreamStateRequired = 0; size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. - aligned_unique_ptr atable = - buildAnchoredAutomataMatcher(*this, &asize); - aligned_unique_ptr ftable = buildFloatingMatcher( - *this, &fsize, &historyRequired, &floatingStreamStateRequired); - aligned_unique_ptr etable = buildEodAnchoredMatcher(*this, &esize); - aligned_unique_ptr sbtable = buildSmallBlockMatcher(*this, &sbsize); + auto anchored_dfas = buildAnchoredDfas(*this); build_context bc; bc.floatingMinLiteralMatchOffset = - findMinFloatingLiteralMatch(*this, atable.get()); + findMinFloatingLiteralMatch(*this, anchored_dfas); + bc.needs_catchup = needsCatchup(*this); + recordResources(bc.resources, *this); + if (!anchored_dfas.empty()) { + bc.resources.has_anchored = true; + } // Build NFAs set no_retrigger_queues; @@ -4336,11 +4459,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - u32 amatcherOffset = 0; - u32 fmatcherOffset = 0; - u32 ematcherOffset = 0; - u32 sbmatcherOffset = 0; - u32 currOffset; /* relative to base of RoseEngine */ if (!bc.engine_blob.empty()) { currOffset = bc.engine_blob_base + byte_length(bc.engine_blob); @@ -4354,28 +4472,46 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); + // Build anchored matcher. + size_t asize = 0; + u32 amatcherOffset = 0; + auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset; - currOffset += (u32)asize; + currOffset += verify_u32(asize); } + // Build floating HWLM matcher. + size_t fsize = 0; + size_t floatingStreamStateRequired = 0; + auto ftable = buildFloatingMatcher(*this, &fsize, &historyRequired, + &floatingStreamStateRequired); + u32 fmatcherOffset = 0; if (ftable) { currOffset = ROUNDUP_CL(currOffset); fmatcherOffset = currOffset; - currOffset += (u32)fsize; + currOffset += verify_u32(fsize); } + // Build EOD-anchored HWLM matcher. + size_t esize = 0; + auto etable = buildEodAnchoredMatcher(*this, &esize); + u32 ematcherOffset = 0; if (etable) { currOffset = ROUNDUP_CL(currOffset); ematcherOffset = currOffset; - currOffset += (u32)esize; + currOffset += verify_u32(esize); } + // Build small-block HWLM matcher. + size_t sbsize = 0; + auto sbtable = buildSmallBlockMatcher(*this, &sbsize); + u32 sbmatcherOffset = 0; if (sbtable) { currOffset = ROUNDUP_CL(currOffset); sbmatcherOffset = currOffset; - currOffset += (u32)sbsize; + currOffset += verify_u32(sbsize); } const vector &int_reports = rm.reports(); @@ -4400,22 +4536,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 nfaInfoLen = sizeof(NfaInfo) * queue_count; currOffset = nfaInfoOffset + nfaInfoLen; - vector art; // Reports raised by anchored roles - vector arit; // inverse reportID -> position in art - calcAnchoredMatches(*this, art, arit); - - currOffset = ROUNDUP_N(currOffset, sizeof(ReportID)); - u32 anchoredReportMapOffset = currOffset; - currOffset += art.size() * sizeof(ReportID); - - currOffset = ROUNDUP_N(currOffset, sizeof(u32)); - u32 anchoredReportInverseMapOffset = currOffset; - currOffset += arit.size() * sizeof(u32); - - currOffset = ROUNDUP_N(currOffset, alignof(ReportID)); - u32 multidirectOffset = currOffset; - currOffset += mdr_reports.size() * sizeof(ReportID); - currOffset = ROUNDUP_N(currOffset, alignof(mmbit_sparse_iter)); u32 activeLeftIterOffset = currOffset; currOffset += activeLeftIter.size() * sizeof(mmbit_sparse_iter); @@ -4502,13 +4622,14 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->somLocationCount = ssm.numSomSlots(); engine->simpleCallback = !rm.numEkeys() && hasSimpleReports(rm.reports()); + engine->needsCatchup = bc.needs_catchup ? 1 : 0; fillInReportInfo(engine.get(), intReportOffset, rm, int_reports); engine->literalCount = verify_u32(final_id_to_literal.size()); engine->litProgramOffset = litProgramOffset; engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; - engine->runtimeImpl = pickRuntimeImpl(*this, outfixEndQueue); + engine->runtimeImpl = pickRuntimeImpl(*this, bc, outfixEndQueue); engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); engine->activeArrayCount = activeArrayCount; @@ -4531,10 +4652,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->stateSize = mmbit_size(bc.numStates); engine->anchorStateSize = anchorStateSize; engine->nfaInfoOffset = nfaInfoOffset; - engine->anchoredReportMapOffset = anchoredReportMapOffset; - engine->anchoredReportInverseMapOffset - = anchoredReportInverseMapOffset; - engine->multidirectOffset = multidirectOffset; engine->eodProgramOffset = eodProgramOffset; engine->eodIterProgramOffset = eodIterProgramOffset; @@ -4580,17 +4697,14 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->size = currOffset; engine->minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; engine->minWidthExcludingBoundaries = minWidth; - engine->maxSafeAnchoredDROffset = findMinWidth(*this, ROSE_FLOATING); engine->floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset; engine->maxBiAnchoredWidth = findMaxBAWidth(*this); engine->noFloatingRoots = hasNoFloatingRoots(); - engine->hasFloatingDirectReports = floating_direct_report; engine->requiresEodCheck = hasEodAnchors(*this, bc, outfixEndQueue); engine->hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); engine->canExhaust = rm.patternSetCanExhaust(); engine->hasSom = hasSom; - engine->anchoredMatches = verify_u32(art.size()); /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */ fillMatcherDistances(*this, engine.get()); @@ -4605,15 +4719,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); - if (atable && anchoredIsMulti(*atable)) { - engine->maxSafeAnchoredDROffset = 1; - } else { - /* overly conservative, really need the min offset of non dr anchored - matches */ - engine->maxSafeAnchoredDROffset = MIN(engine->maxSafeAnchoredDROffset, - engine->floatingMinLiteralMatchOffset); - } - NfaInfo *nfa_infos = (NfaInfo *)(ptr + nfaInfoOffset); populateNfaInfoBasics(*this, bc, outfixes, suffixEkeyLists, no_retrigger_queues, nfa_infos); @@ -4629,9 +4734,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { ptr + lookaroundReachOffset, bc.lookaround); fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); - copy_bytes(ptr + engine->anchoredReportMapOffset, art); - copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); - copy_bytes(ptr + engine->multidirectOffset, mdr_reports); copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); // Safety check: we shouldn't have written anything to the engine blob diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index db1e49eeb..477335caf 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -247,51 +247,12 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { return false; } -static -void makeDirectReport(RoseBuildImpl &build, u32 i) { - if (build.literals.right.at(i).table == ROSE_FLOATING) { - build.floating_direct_report = true; - } - - rose_literal_info &info = build.literal_info[i]; - assert(!info.vertices.empty()); - - vector reports; - for (const auto &v : info.vertices) { - const auto &r = build.g[v].reports; - reports.insert(end(reports), begin(r), end(r)); - } - sort(begin(reports), end(reports)); - reports.erase(unique(begin(reports), end(reports)), end(reports)); - - if (reports.size() == 1) { - // A single direct report. We set the high bit to indicate it's a - // direct report and encode the ReportID itself in the final_id - // field. - ReportID report = reports.front(); - assert(!(report & LITERAL_DR_FLAG)); - info.final_id = LITERAL_DR_FLAG | report; - DEBUG_PRINTF("direct report %u -> %u\n", info.final_id, report); - } else { - // A multi-direct report. Here we write the report set into a list - // to be triggered when we see this literal. - u32 mdr_index = verify_u32(build.mdr_reports.size()); - info.final_id = LITERAL_MDR_FLAG | mdr_index; - DEBUG_PRINTF("multi direct report %u -> [%s]\n", info.final_id, - as_string_list(reports).c_str()); - build.mdr_reports.insert(end(build.mdr_reports), begin(reports), - end(reports)); - build.mdr_reports.push_back(MO_INVALID_IDX); - } -} - +/** \brief Allocate final literal IDs for all literals. + * + * These are the literal ids used in the bytecode. + */ static void allocateFinalLiteralId(RoseBuildImpl &tbi) { - /* allocate final literal ids - these are the literal ids used in the - * bytecode. - * DRs already have special final ids allocated - */ - RoseGraph &g = tbi.g; set anch; @@ -309,11 +270,6 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { continue; } - if (tbi.isDirectReport(i)) { - makeDirectReport(tbi, i); - continue; - } - // The special EOD event literal has its own program and does not need // a real literal ID. if (i == tbi.eod_event_literal_id) { @@ -902,23 +858,6 @@ bool RoseBuildImpl::isDelayed(u32 id) const { return literal_info.at(id).undelayed_id != id; } -bool RoseBuildImpl::hasDirectFinalId(u32 id) const { - return literal_info.at(id).final_id & LITERAL_MDR_FLAG; -} - -bool RoseBuildImpl::allDirectFinalIds(RoseVertex v) const { - const auto &lits = g[v].literals; - if (lits.empty()) { - return false; - } - for (const auto &lit : lits) { - if (!hasDirectFinalId(lit)) { - return false; - } - } - return true; -} - bool RoseBuildImpl::hasFinalId(u32 id) const { return literal_info.at(id).final_id != MO_INVALID_IDX; } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e73d81c3f..cd32749eb 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -146,15 +146,6 @@ class RoseGraphWriter { os << ")"; } - if (!g[v].literals.empty()) { - u32 id = *g[v].literals.begin(); - if (id < build.literal_info.size() - && build.literal_info[id].final_id != MO_INVALID_IDX - && (build.literal_info[id].final_id & LITERAL_DR_FLAG)) { - os << "\\nDIRECT REPORT"; - } - } - if (ghost.find(v) != ghost.end()) { os << "\\nGHOST"; } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 1a5e77aa3..592476459 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -415,7 +415,6 @@ class RoseBuildImpl : public RoseBuild { // Is the Rose anchored? bool hasNoFloatingRoots() const; - bool hasDirectReports() const; RoseVertex cloneVertex(RoseVertex v); @@ -441,17 +440,6 @@ class RoseBuildImpl : public RoseBuild { bool isDirectReport(u32 id) const; bool isDelayed(u32 id) const; - /** - * \brief True if the given literal ID is a direct or multi-direct report. - */ - bool hasDirectFinalId(u32 id) const; - - /** - * \brief True if all the literals associated with the given vertex are - * direct or multi-direct reports. - */ - bool allDirectFinalIds(RoseVertex v) const; - bool hasFinalId(u32 id) const; bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored @@ -526,16 +514,10 @@ class RoseBuildImpl : public RoseBuild { * null again). */ std::unique_ptr mpv_outfix = nullptr; - bool floating_direct_report; - u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX. u32 max_rose_anchored_floating_overlap; - /** \brief Flattened list of report IDs for multi-direct reports, indexed - * by MDR final_id. */ - std::vector mdr_reports; - QueueIndexFactory qif; ReportManager &rm; SomSlotManager &ssm; diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 1df1b2d99..e5c5b4e62 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -79,7 +79,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, group_end(0), anchored_base_id(MO_INVALID_IDX), ematcher_region_size(0), - floating_direct_report(false), eod_event_literal_id(MO_INVALID_IDX), max_rose_anchored_floating_overlap(0), rm(rm_in), diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 4eaab3264..89dce9810 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -403,6 +403,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(DEDUPE_AND_REPORT) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_EXHAUSTED) { os << " ekey " << ri->ekey << endl; os << " fail_jump " << offset + ri->fail_jump << endl; @@ -858,8 +865,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) { sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); - fprintf(f, " - nfa info table : %u bytes\n", - t->anchoredReportMapOffset - t->nfaInfoOffset); + fprintf(f, " - nfa info table : %zu bytes\n", + t->queueCount * sizeof(NfaInfo)); fprintf(f, " - lookaround table : %u bytes\n", t->nfaInfoOffset - t->lookaroundTableOffset); fprintf(f, " - lookaround reach : %u bytes\n", @@ -898,8 +905,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { t->minWidthExcludingBoundaries); fprintf(f, " maxBiAnchoredWidth : %s\n", rose_off(t->maxBiAnchoredWidth).str().c_str()); - fprintf(f, " maxSafeAnchoredDROffset : %s\n", - rose_off(t->maxSafeAnchoredDROffset).str().c_str()); fprintf(f, " minFloatLitMatchOffset : %s\n", rose_off(t->floatingMinLiteralMatchOffset).str().c_str()); fprintf(f, " delay_base_id : %u\n", t->delay_base_id); @@ -936,7 +941,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { void roseDumpStructRaw(const RoseEngine *t, FILE *f) { fprintf(f, "struct RoseEngine {\n"); - DUMP_U8(t, hasFloatingDirectReports); DUMP_U8(t, noFloatingRoots); DUMP_U8(t, requiresEodCheck); DUMP_U8(t, hasOutfixesInSmallBlock); @@ -946,6 +950,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U8(t, hasSom); DUMP_U8(t, somHorizon); DUMP_U8(t, simpleCallback); + DUMP_U8(t, needsCatchup); DUMP_U32(t, mode); DUMP_U32(t, historyRequired); DUMP_U32(t, ekeyCount); @@ -972,7 +977,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, litProgramOffset); DUMP_U32(t, litDelayRebuildProgramOffset); DUMP_U32(t, literalCount); - DUMP_U32(t, multidirectOffset); DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); @@ -994,14 +998,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, floatingDistance); DUMP_U32(t, floatingMinDistance); DUMP_U32(t, smallBlockDistance); - DUMP_U32(t, maxSafeAnchoredDROffset); DUMP_U32(t, floatingMinLiteralMatchOffset); DUMP_U32(t, nfaInfoOffset); - DUMP_U32(t, anchoredReportMapOffset); - DUMP_U32(t, anchoredReportInverseMapOffset); DUMP_U64(t, initialGroups); DUMP_U32(t, size); - DUMP_U32(t, anchoredMatches); DUMP_U32(t, delay_count); DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 0d6c96e9b..e9edbc154 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -48,29 +48,6 @@ typedef u64a rose_group; #define MAX_DELAY (DELAY_SLOT_COUNT - 1) #define DELAY_MASK (DELAY_SLOT_COUNT - 1) -// Direct report stuff -#define LITERAL_DR_FLAG (1U << 31) -#define LITERAL_MDR_FLAG ((1U << 30) | (1U << 31)) - -/** \brief True if literal is either a direct report or a multi-direct report. - * */ -static really_inline -u32 isLiteralDR(u32 id) { - return id & LITERAL_DR_FLAG; -} - -static really_inline -u32 isLiteralMDR(u32 id) { - return (id & LITERAL_MDR_FLAG) == LITERAL_MDR_FLAG; -} - -static really_inline -ReportID literalToReport(u32 id) { - assert(id & LITERAL_DR_FLAG); - assert(!(id & (LITERAL_MDR_FLAG ^ LITERAL_DR_FLAG))); - return id & ~LITERAL_DR_FLAG; -} - /* Allocation of Rose literal ids * * The rose literal id space is segmented: @@ -87,16 +64,6 @@ ReportID literalToReport(u32 id) { * | | Delayed version of normal literals * | | * ---- literalCount - * ... - * ... - * ... - * ---- LITERAL_DR_FLAG - * | | Direct Report literals: immediately raise an internal report with id - * | | given by (lit_id & ~LITERAL_DR_FLAG). Raised by a or f tables (or e??). - * | | No literal programs. - * | | - * | | - * ---- */ /* Rose Literal Sources @@ -317,14 +284,12 @@ struct RoseBoundaryReports { * -# small block table * -# array of NFA offsets, one per queue * -# array of state offsets, one per queue (+) - * -# multi-direct report array * * (+) stateOffset array note: Offsets in the array are either into the stream * state (normal case) or into the tstate region of scratch (for transient rose * nfas). Rose nfa info table can distinguish the cases. */ struct RoseEngine { - u8 hasFloatingDirectReports; // has at least one floating direct report literal u8 noFloatingRoots; /* only need to run the anchored table if something * matched in the anchored table */ u8 requiresEodCheck; /* stuff happens at eod time */ @@ -339,6 +304,7 @@ struct RoseEngine { SOM precision) */ u8 simpleCallback; /**< has only external reports with no bounds checks, plus no exhaustion keys */ + u8 needsCatchup; /** catch up needs to be run on every report. */ u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */ u32 historyRequired; /**< max amount of history required for streaming */ u32 ekeyCount; /**< number of exhaustion keys */ @@ -392,7 +358,6 @@ struct RoseEngine { */ u32 literalCount; - u32 multidirectOffset; /**< offset of multi-direct report list. */ u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ @@ -432,18 +397,12 @@ struct RoseEngine { u32 floatingMinDistance; /* start of region to run floating table over */ u32 smallBlockDistance; /* end of region to run the floating table over ROSE_BOUND_INF if not bounded */ - u32 maxSafeAnchoredDROffset; /* the maximum offset that we can safely raise - * a direct report from the anchored table - * without delaying it */ u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a * 'valid' match from the floating * table */ u32 nfaInfoOffset; /* offset to the nfa info offset array */ - u32 anchoredReportMapOffset; /* am_log index --> reportid */ - u32 anchoredReportInverseMapOffset; /* reportid --> am_log index */ rose_group initialGroups; u32 size; // (bytes) - u32 anchoredMatches; /* number of anchored roles generating matches */ u32 delay_count; /* number of delayed literal ids. */ u32 delay_base_id; /* literal id of the first delayed literal. * delayed literal ids are contiguous */ diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 36a9aabda..7ac0360b5 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -77,6 +77,9 @@ enum RoseInstructionCode { /** \brief Fire an exhaustible SOM report. */ ROSE_INSTR_REPORT_SOM_EXHAUST, + /** \brief Super-instruction combining DEDUPE and REPORT. */ + ROSE_INSTR_DEDUPE_AND_REPORT, + ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. ROSE_INSTR_SET_STATE, //!< Switch a state index on. @@ -230,12 +233,17 @@ struct ROSE_STRUCT_REPORT_SOM_AWARE { struct ROSE_STRUCT_REPORT { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + ReportID report; //!< Internal report ID (used for assertions). + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. }; struct ROSE_STRUCT_REPORT_EXHAUST { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + ReportID report; //!< Internal report ID (used for assertions). + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. + u32 ekey; //!< Exhaustion key. }; struct ROSE_STRUCT_REPORT_SOM { @@ -253,6 +261,12 @@ struct ROSE_STRUCT_REPORT_SOM_EXT { ReportID report; }; +struct ROSE_STRUCT_DEDUPE_AND_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_EXHAUSTED { u8 code; //!< From enum RoseInstructionCode. u32 ekey; //!< Exhaustion key to check. diff --git a/src/rose/runtime.h b/src/rose/runtime.h index e7e953774..275adfb4a 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -80,16 +80,6 @@ u8 *getActiveLeftArray(const struct RoseEngine *t, char *state) { return (u8 *)(state + t->stateOffsets.activeLeftArray); } -static really_inline -const u32 *getAnchoredInverseMap(const struct RoseEngine *t) { - return (const u32 *)(((const u8 *)t) + t->anchoredReportInverseMapOffset); -} - -static really_inline -const u32 *getAnchoredMap(const struct RoseEngine *t) { - return (const u32 *)(((const u8 *)t) + t->anchoredReportMapOffset); -} - static really_inline rose_group loadGroups(const struct RoseEngine *t, const char *state) { return partial_load_u64a(state + t->stateOffsets.groups, @@ -167,8 +157,6 @@ const struct internal_report *getInternalReport(const struct RoseEngine *t, return reports + intId; } -#define ANCHORED_MATCH_SENTINEL (~0U) - static really_inline void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) { DEBUG_PRINTF("match @%llu, last match @%llu\n", offset, diff --git a/src/rose/stream.c b/src/rose/stream.c index 161e00596..759f75532 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -396,7 +396,7 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, u64a offset) { struct RoseContext *tctxt = &scratch->tctxt; - if (roseCatchUpTo(t, scratch, length + scratch->core_info.buf_offset, 0) == + if (roseCatchUpTo(t, scratch, length + scratch->core_info.buf_offset) == HWLM_TERMINATE_MATCHING) { return; /* dead; no need to clean up state. */ } @@ -429,6 +429,10 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, assert(scratch->core_info.hbuf); assert(scratch->core_info.buf); + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount) < MAX_SPARSE_ITER_STATES); @@ -459,13 +463,10 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; - tctxt->curr_anchored_loc = MMB_INVALID; - tctxt->curr_row_offset = 0; DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu\n", scratch->core_info.hlen, scratch->core_info.len); fatbit_clear(scratch->aqa); - scratch->am_log_sum = 0; /* clear the anchored logs */ scratch->al_log_sum = 0; scratch->catchup_pq.qm_size = 0; @@ -484,8 +485,6 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, if (can_stop_matching(scratch)) { goto exit; } - - resetAnchoredLog(t, scratch); } const struct HWLM *ftable = getFLiteralMatcher(t); diff --git a/src/runtime.c b/src/runtime.c index 24ee90f08..5f3579186 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -159,126 +159,26 @@ void setStreamStatus(char *state, u8 status) { *(u8 *)state = status; } -static really_inline -hwlmcb_rv_t multiDirectAdaptor(u64a real_end, ReportID direct_id, void *context, - struct core_info *ci, char is_simple, - char do_som) { - // Multi-direct report, list of reports indexed by the ID. - u32 mdr_offset = direct_id & ~LITERAL_MDR_FLAG; - const struct RoseEngine *t = ci->rose; - const ReportID *id - = (const ReportID *)((const char *)t + t->multidirectOffset) - + mdr_offset; - for (; *id != MO_INVALID_IDX; id++) { - int rv = roseAdaptor_i(real_end, *id, context, is_simple, do_som); - if (rv == MO_HALT_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - return HWLM_CONTINUE_MATCHING; -} - static int roseAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { return roseAdaptor_i(offset, id, scratch, 0, 0); } -static -hwlmcb_rv_t hwlmAdaptor(UNUSED size_t start, size_t end, u32 direct_id, - void *context) { - struct hs_scratch *scratch = (struct hs_scratch *)context; - struct core_info *ci = &scratch->core_info; - u64a real_end = (u64a)end + ci->buf_offset + 1; - - if (isLiteralMDR(direct_id)) { - return multiDirectAdaptor(real_end, direct_id, context, ci, 0, 0); - } - - ReportID id = literalToReport(direct_id); - int rv = roseAdaptor_i(real_end, id, context, 0, 0); - if (rv == MO_CONTINUE_MATCHING || rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } else { - return HWLM_TERMINATE_MATCHING; - } -} - static int roseSimpleAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { return roseAdaptor_i(offset, id, scratch, 1, 0); } -static -hwlmcb_rv_t hwlmSimpleAdaptor(UNUSED size_t start, size_t end, u32 direct_id, - void *context) { - struct hs_scratch *scratch = (struct hs_scratch *)context; - struct core_info *ci = &scratch->core_info; - u64a real_end = (u64a)end + ci->buf_offset + 1; - - if (isLiteralMDR(direct_id)) { - return multiDirectAdaptor(real_end, direct_id, context, ci, 1, 0); - } - - // Single direct report. - ReportID id = literalToReport(direct_id); - int rv = roseAdaptor_i(real_end, id, context, 1, 0); - if (rv == MO_CONTINUE_MATCHING || rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } else { - return HWLM_TERMINATE_MATCHING; - } -} - static int roseSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { return roseAdaptor_i(offset, id, scratch, 0, 1); } -static -hwlmcb_rv_t hwlmSomAdaptor(UNUSED size_t start, size_t end, u32 direct_id, - void *context) { - struct hs_scratch *scratch = (struct hs_scratch *)context; - struct core_info *ci = &scratch->core_info; - u64a real_end = (u64a)end + ci->buf_offset + 1; - - if (isLiteralMDR(direct_id)) { - return multiDirectAdaptor(real_end, direct_id, context, ci, 0, 1); - } - - ReportID id = literalToReport(direct_id); - int rv = roseAdaptor_i(real_end, id, context, 0, 1); - if (rv == MO_CONTINUE_MATCHING || rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } else { - return HWLM_TERMINATE_MATCHING; - } -} - static int roseSimpleSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { return roseAdaptor_i(offset, id, scratch, 1, 1); } -static -hwlmcb_rv_t hwlmSimpleSomAdaptor(UNUSED size_t start, size_t end, u32 direct_id, - void *context) { - struct hs_scratch *scratch = (struct hs_scratch *)context; - struct core_info *ci = &scratch->core_info; - u64a real_end = (u64a)end + ci->buf_offset + 1; - - if (isLiteralMDR(direct_id)) { - return multiDirectAdaptor(real_end, direct_id, context, ci, 1, 1); - } - - ReportID id = literalToReport(direct_id); - int rv = roseAdaptor_i(real_end, id, context, 1, 1); - if (rv == MO_CONTINUE_MATCHING || rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } else { - return HWLM_TERMINATE_MATCHING; - } -} - static really_inline RoseCallback selectAdaptor(const struct RoseEngine *rose) { const char is_simple = rose->simpleCallback; @@ -291,18 +191,6 @@ RoseCallback selectAdaptor(const struct RoseEngine *rose) { } } -static really_inline -HWLMCallback selectHwlmAdaptor(const struct RoseEngine *rose) { - const char is_simple = rose->simpleCallback; - const char do_som = rose->hasSom; - - if (do_som) { - return is_simple ? hwlmSimpleSomAdaptor : hwlmSomAdaptor; - } else { - return is_simple ? hwlmSimpleAdaptor : hwlmAdaptor; - } -} - static int roseSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, struct hs_scratch *scratch) { @@ -372,14 +260,21 @@ SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) { return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor; } +/** + * \brief Fire callbacks for a boundary report list. + * + * Returns MO_HALT_MATCHING if the user has instructed us to halt, and + * MO_CONTINUE_MATCHING otherwise. + */ + static never_inline -void processReportList(const struct RoseEngine *rose, u32 base_offset, - u64a stream_offset, hs_scratch_t *scratch) { +int processReportList(const struct RoseEngine *rose, u32 base_offset, + u64a stream_offset, hs_scratch_t *scratch) { DEBUG_PRINTF("running report list at offset %u\n", base_offset); if (told_to_stop_matching(scratch)) { DEBUG_PRINTF("matching has been terminated\n"); - return; + return MO_HALT_MATCHING; } if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) { @@ -393,20 +288,27 @@ void processReportList(const struct RoseEngine *rose, u32 base_offset, scratch->deduper.som_log_dirty = 0; } - const ReportID *report = - (const ReportID *)((const char *)rose + base_offset); + const ReportID *report = getByOffset(rose, base_offset); /* never required to do som as vacuous reports are always external */ if (rose->simpleCallback) { for (; *report != MO_INVALID_IDX; report++) { - roseSimpleAdaptor(stream_offset, *report, scratch); + int rv = roseSimpleAdaptor(stream_offset, *report, scratch); + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } } } else { for (; *report != MO_INVALID_IDX; report++) { - roseAdaptor(stream_offset, *report, scratch); + int rv = roseAdaptor(stream_offset, *report, scratch); + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } } } + + return MO_CONTINUE_MATCHING; } /** \brief Initialise SOM state. Used in both block and streaming mode. */ @@ -443,13 +345,13 @@ void pureLiteralBlockExec(const struct RoseEngine *rose, size_t length = scratch->core_info.len; DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl); - hwlmExec(ftable, buffer, length, 0, selectHwlmAdaptor(rose), scratch, + hwlmExec(ftable, buffer, length, 0, rosePureLiteralCallback, scratch, rose->initialGroups); } static really_inline -void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, - struct hs_scratch *scratch) { +void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t, + struct hs_scratch *scratch) { const struct NfaInfo *info = getNfaInfoByQueue(t, qi); q->nfa = getNfaByInfo(t, info); q->end = 0; @@ -492,7 +394,7 @@ void soleOutfixBlockExec(const struct RoseEngine *t, } struct mq *q = scratch->queues; - initQueue(q, 0, t, scratch); + initOutfixQueue(q, 0, t, scratch); q->length = len; /* adjust for rev_accel */ nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, 0); @@ -579,6 +481,11 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, clearEvec(scratch->core_info.exhaustionVector, rose); + // Rose program execution (used for some report paths) depends on these + // values being initialised. + scratch->tctxt.lastMatchOffset = 0; + scratch->tctxt.minMatchOffset = 0; + if (!length) { if (rose->boundary.reportZeroEodOffset) { processReportList(rose, rose->boundary.reportZeroEodOffset, 0, @@ -588,7 +495,11 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, } if (rose->boundary.reportZeroOffset) { - processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); + int rv = processReportList(rose, rose->boundary.reportZeroOffset, 0, + scratch); + if (rv == MO_HALT_MATCHING) { + goto set_retval; + } } if (rose->minWidthExcludingBoundaries > length) { @@ -648,7 +559,8 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, } if (rose->boundary.reportEodOffset) { - processReportList(rose, rose->boundary.reportEodOffset, length, scratch); + processReportList(rose, rose->boundary.reportEodOffset, length, + scratch); } set_retval: @@ -782,7 +694,7 @@ void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) { const struct NFA *nfa = getNfaByQueue(t, 0); struct mq *q = scratch->queues; - initQueue(q, 0, t, scratch); + initOutfixQueue(q, 0, t, scratch); if (!scratch->core_info.buf_offset) { DEBUG_PRINTF("buf_offset is zero\n"); return; /* no vacuous engines */ @@ -821,13 +733,21 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, if (!id->offset) { if (rose->boundary.reportZeroEodOffset) { - processReportList(rose, rose->boundary.reportZeroEodOffset, 0, - scratch); + int rv = processReportList(rose, rose->boundary.reportZeroEodOffset, + 0, scratch); + if (rv == MO_HALT_MATCHING) { + scratch->core_info.status |= STATUS_TERMINATED; + return; + } } } else { if (rose->boundary.reportEodOffset) { - processReportList(rose, rose->boundary.reportEodOffset, + int rv = processReportList(rose, rose->boundary.reportEodOffset, id->offset, scratch); + if (rv == MO_HALT_MATCHING) { + scratch->core_info.status |= STATUS_TERMINATED; + return; + } } if (rose->requiresEodCheck) { @@ -962,7 +882,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, // start the match region at zero. const size_t start = 0; - hwlmExecStreaming(ftable, scratch, len2, start, selectHwlmAdaptor(rose), + hwlmExecStreaming(ftable, scratch, len2, start, rosePureLiteralCallback, scratch, rose->initialGroups, hwlm_stream_state); if (!told_to_stop_matching(scratch) && @@ -988,7 +908,7 @@ void soleOutfixStreamExec(struct hs_stream *stream_state, const struct NFA *nfa = getNfaByQueue(t, 0); struct mq *q = scratch->queues; - initQueue(q, 0, t, scratch); + initOutfixQueue(q, 0, t, scratch); if (!scratch->core_info.buf_offset) { nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, 0); @@ -1045,6 +965,11 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, assert(scratch->core_info.hlen <= id->offset && scratch->core_info.hlen <= rose->historyRequired); + // Rose program execution (used for some report paths) depends on these + // values being initialised. + scratch->tctxt.lastMatchOffset = 0; + scratch->tctxt.minMatchOffset = id->offset; + prefetch_data(data, length); if (rose->somLocationCount) { diff --git a/src/scratch.c b/src/scratch.c index 854fc3125..42db42acd 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -75,8 +75,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 bStateSize = proto->bStateSize; u32 tStateSize = proto->tStateSize; u32 fullStateSize = proto->fullStateSize; - u32 anchored_region_len = proto->anchored_region_len; - u32 anchored_region_width = proto->anchored_region_width; u32 anchored_literal_region_len = proto->anchored_literal_region_len; u32 anchored_literal_region_width = proto->anchored_literal_count; @@ -90,11 +88,8 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { size_t queue_size = queueCount * sizeof(struct mq); size_t qmpq_size = queueCount * sizeof(struct queue_match); - assert(anchored_region_len < 8 * sizeof(s->am_log_sum)); - assert(anchored_literal_region_len < 8 * sizeof(s->am_log_sum)); + assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum)); - size_t anchored_region_size = - fatbit_array_size(anchored_region_len, anchored_region_width); size_t anchored_literal_region_size = fatbit_array_size( anchored_literal_region_len, anchored_literal_region_width); size_t delay_region_size = @@ -109,7 +104,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + 2 * fatbit_size(deduperCount) /* need odd and even logs */ + 2 * fatbit_size(deduperCount) /* ditto som logs */ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ - + anchored_region_size + anchored_literal_region_size + qmpq_size + delay_region_size + som_store_size @@ -165,16 +159,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { current += fatbit_size(proto->delay_count); } - current = ROUNDUP_PTR(current, alignof(struct fatbit *)); - s->am_log = (struct fatbit **)current; - current += sizeof(struct fatbit *) * anchored_region_len; - current = ROUNDUP_PTR(current, alignof(struct fatbit)); - for (u32 i = 0; i < anchored_region_len; i++) { - s->am_log[i] = (struct fatbit *)current; - assert(ISALIGNED(s->am_log[i])); - current += fatbit_size(anchored_region_width); - } - current = ROUNDUP_PTR(current, alignof(struct fatbit *)); s->al_log = (struct fatbit **)current; current += sizeof(struct fatbit *) * anchored_literal_region_len; @@ -295,22 +279,6 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { } proto->scratch_alloc = (char *)proto_tmp; - u32 max_anchored_match = rose->anchoredDistance; - if (max_anchored_match > rose->maxSafeAnchoredDROffset) { - u32 anchored_region_len = max_anchored_match - - rose->maxSafeAnchoredDROffset; - if (anchored_region_len > proto->anchored_region_len) { - resize = 1; - proto->anchored_region_len = anchored_region_len; - } - } - - u32 anchored_region_width = rose->anchoredMatches; - if (anchored_region_width > proto->anchored_region_width) { - resize = 1; - proto->anchored_region_width = anchored_region_width; - } - if (rose->anchoredDistance > proto->anchored_literal_region_len) { resize = 1; proto->anchored_literal_region_len = rose->anchoredDistance; diff --git a/src/scratch.h b/src/scratch.h index 48b3de7d1..99cdb3700 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -122,8 +122,6 @@ struct RoseContext { RoseCallback cb; RoseCallbackSom cb_som; u32 filledDelayedSlots; - u32 curr_anchored_loc; /**< last read/written row */ - u32 curr_row_offset; /**< last read/written entry */ u32 curr_qi; /**< currently executing main queue index during * \ref nfaQueueExec */ }; @@ -158,15 +156,11 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid * & active */ struct fatbit **delay_slots; - struct fatbit **am_log; struct fatbit **al_log; - u64a am_log_sum; u64a al_log_sum; struct catchup_pq catchup_pq; struct core_info core_info; struct match_deduper deduper; - u32 anchored_region_len; - u32 anchored_region_width; u32 anchored_literal_region_len; u32 anchored_literal_count; u32 delay_count; @@ -192,11 +186,6 @@ struct hs_scratch *tctxtToScratch(struct RoseContext *tctxt) { ((char *)tctxt - offsetof(struct hs_scratch, tctxt)); } -static really_inline -struct fatbit **getAnchoredLog(struct hs_scratch *scratch) { - return scratch->am_log; -} - /* array of fatbit ptr; TODO: why not an array of fatbits? */ static really_inline struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) { diff --git a/src/scratch_dump.cpp b/src/scratch_dump.cpp index 00e09a838..78a854bb9 100644 --- a/src/scratch_dump.cpp +++ b/src/scratch_dump.cpp @@ -56,12 +56,6 @@ void dumpScratch(const struct hs_scratch *s, FILE *f) { fprintf(f, " bStateSize : %u bytes\n", s->bStateSize); fprintf(f, " active queue array : %u bytes\n", mmbit_size(s->queueCount)); - - size_t anchored_region_size = - s->anchored_region_len * mmbit_size(s->anchored_region_width) + - sizeof(u8 *) + mmbit_size(s->anchored_region_len); - - fprintf(f, " anchored region : %zu bytes\n", anchored_region_size); fprintf(f, " qmpq : %zu bytes\n", s->queueCount * sizeof(struct queue_match)); fprintf(f, " delay info : %u bytes\n", From 12921b7c979b52151e9c6c358ccadfb358bf2dbc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 23 Oct 2015 17:15:24 +1100 Subject: [PATCH 105/218] Add hs_expression_ext_info() API function This is a variant of hs_expression_info() that can also accept extended parameters. --- src/hs.cpp | 18 ++++- src/hs_compile.h | 63 ++++++++++++++- src/nfagraph/ng_expr_info.cpp | 33 ++++++-- unit/hyperscan/arg_checks.cpp | 31 +++++++ unit/hyperscan/expr_info.cpp | 148 ++++++++++++++++++++++++---------- 5 files changed, 238 insertions(+), 55 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index e665539b4..3680e79eb 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -39,6 +39,7 @@ #include "compiler/error.h" #include "nfagraph/ng.h" #include "nfagraph/ng_expr_info.h" +#include "nfagraph/ng_extparam.h" #include "parser/parse_error.h" #include "parser/Parser.h" #include "parser/prefilter.h" @@ -310,7 +311,8 @@ hs_error_t hs_compile_ext_multi(const char * const *expressions, static hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, - unsigned int mode, hs_expr_info_t **info, + const hs_expr_ext_t *ext, unsigned int mode, + hs_expr_info_t **info, hs_compile_error_t **error) { if (!error) { // nowhere to write an error, but we can still return an error code. @@ -347,7 +349,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } ReportManager rm(cc.grey); - ParsedExpression pe(0, expression, flags, 0); + ParsedExpression pe(0, expression, flags, 0, ext); assert(pe.component); // Apply prefiltering transformations if desired. @@ -362,6 +364,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, throw ParseError("Internal error."); } + handleExtendedParams(rm, *g, cc); fillExpressionInfo(rm, *g, &local_info); } catch (const CompileError &e) { @@ -394,7 +397,16 @@ extern "C" HS_PUBLIC_API hs_error_t hs_expression_info(const char *expression, unsigned int flags, hs_expr_info_t **info, hs_compile_error_t **error) { - return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info, + return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK, + info, error); +} + +extern "C" HS_PUBLIC_API +hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error) { + return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info, error); } diff --git a/src/hs_compile.h b/src/hs_compile.h index 68f385840..48168cc2a 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -158,7 +158,7 @@ typedef struct hs_platform_info { /** * A type containing information related to an expression that is returned by - * @ref hs_expression_info(). + * @ref hs_expression_info() or @ref hs_expression_ext_info. */ typedef struct hs_expr_info { /** @@ -201,7 +201,8 @@ typedef struct hs_expr_info { /** * A structure containing additional parameters related to an expression, - * passed in at build time to @ref hs_compile_ext_multi(). + * passed in at build time to @ref hs_compile_ext_multi() or @ref + * hs_expression_ext_info. * * These parameters allow the set of matches produced by a pattern to be * constrained at compile time, rather than relying on the application to @@ -401,7 +402,7 @@ hs_error_t hs_compile_multi(const char *const *expressions, hs_database_t **db, hs_compile_error_t **error); /** - * The multiple regular expression compiler with extended pattern support. + * The multiple regular expression compiler with extended parameter support. * * This function call compiles a group of expressions into a database in the * same way as @ref hs_compile_multi(), but allows additional parameters to be @@ -550,6 +551,62 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags, hs_expr_info_t **info, hs_compile_error_t **error); +/** + * Utility function providing information about a regular expression, with + * extended parameter support. The information provided in @ref hs_expr_info_t + * includes the minimum and maximum width of a pattern match. + * + * @param expression + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @a flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a + * flags. + * + * @param flags + * Flags which modify the behaviour of the expression. Multiple flags may + * be used by ORing them together. Valid values are: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated by the + * expression per stream. + * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an + * empty string, such as `.*`. + * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters. + * - HS_FLAG_UCP - Use Unicode properties for character classes. + * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * + * @param ext + * A pointer to a filled @ref hs_expr_ext_t structure that defines + * extended behaviour for this pattern. NULL may be specified if no + * extended parameters are needed. + * + * @param info + * On success, a pointer to the pattern information will be returned in + * this parameter, or NULL on failure. This structure is allocated using + * the allocator supplied in @ref hs_set_allocator() (or malloc() if no + * allocator was set) and should be freed by the caller. + * + * @param error + * If the call fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error); + /** * Populates the platform information based on the current host. * diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index 2afb568be..cfd34ce6a 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -94,11 +94,34 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, const DepthMinMax &d = depths.at(idx); for (ReportID report_id : w[v].reports) { - const Report &ir = rm.getReport(report_id); - assert(ir.type == EXTERNAL_CALLBACK); - s32 adjust = ir.offsetAdjust; - info.min = min(info.min, d.min + adjust); - info.max = max(info.max, d.max + adjust); + const Report &report = rm.getReport(report_id); + assert(report.type == EXTERNAL_CALLBACK); + + DepthMinMax rd = d; + + // Compute graph width to this report, taking any offset adjustment + // into account. + rd.min += report.offsetAdjust; + rd.max += report.offsetAdjust; + + // A min_length param is a lower bound for match width. + if (report.minLength && report.minLength <= depth::max_value()) { + depth min_len((u32)report.minLength); + rd.min = max(rd.min, min_len); + rd.max = max(rd.max, min_len); + } + + // A max_offset param is an upper bound for match width. + if (report.maxOffset && report.maxOffset <= depth::max_value()) { + depth max_offset((u32)report.maxOffset); + rd.min = min(rd.min, max_offset); + rd.max = min(rd.max, max_offset); + } + + DEBUG_PRINTF("vertex %u report %u: %s\n", w[v].index, report_id, + rd.str().c_str()); + + info = unionDepthMinMax(info, rd); } } diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index dbc692c54..6d4e5fa9a 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -2154,6 +2154,37 @@ TEST(HyperscanArgChecks, ExprInfoNullErrPtr) { EXPECT_TRUE(info == nullptr); } +// hs_expression_ext_info: Compile a NULL pattern +TEST(HyperscanArgChecks, ExprExtInfoNullExpression) { + hs_expr_info_t *info = nullptr; + hs_compile_error_t *compile_err = nullptr; + hs_error_t err = + hs_expression_ext_info(nullptr, 0, nullptr, &info, &compile_err); + EXPECT_EQ(HS_COMPILER_ERROR, err); + EXPECT_TRUE(info == nullptr); + EXPECT_TRUE(compile_err != nullptr); + hs_free_compile_error(compile_err); +} + +// hs_expression_ext_info: NULL info block ptr +TEST(HyperscanArgChecks, ExprExtInfoNullInfoPtr) { + hs_compile_error_t *compile_err = nullptr; + hs_error_t err = + hs_expression_ext_info("foobar", 0, nullptr, nullptr, &compile_err); + EXPECT_EQ(HS_COMPILER_ERROR, err); + EXPECT_TRUE(compile_err != nullptr); + hs_free_compile_error(compile_err); +} + +// hs_expression_ext_info: No compiler error block +TEST(HyperscanArgChecks, ExprExtInfoNullErrPtr) { + hs_expr_info_t *info = nullptr; + hs_error_t err = + hs_expression_ext_info("foobar", 0, nullptr, &info, nullptr); + EXPECT_EQ(HS_COMPILER_ERROR, err); + EXPECT_TRUE(info == nullptr); +} + TEST(HyperscanArgChecks, hs_free_database_null) { hs_error_t err = hs_free_database(nullptr); ASSERT_EQ(HS_SUCCESS, err); diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index 4d4a1c97c..984104c55 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -42,6 +42,8 @@ namespace /* anonymous */ { struct expected_info { const char *pattern; + hs_expr_ext ext; + unsigned min; unsigned max; char unordered_matches; @@ -52,10 +54,25 @@ struct expected_info { class ExprInfop : public TestWithParam { }; -TEST_P(ExprInfop, width) { +static +void check_info(const expected_info &ei, const hs_expr_info_t *info) { + EXPECT_EQ(ei.min, info->min_width); + EXPECT_EQ(ei.max, info->max_width); + EXPECT_EQ(ei.unordered_matches, info->unordered_matches); + EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod); + EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod); +} + +// Check with hs_expression_info function. +TEST_P(ExprInfop, check_no_ext) { const expected_info &ei = GetParam(); SCOPED_TRACE(ei.pattern); + if (ei.ext.flags) { + // This is an extparam test, skip it. + return; + } + hs_expr_info_t *info = nullptr; hs_compile_error_t *c_err = nullptr; hs_error_t err = hs_expression_info(ei.pattern, 0, &info, &c_err); @@ -63,54 +80,97 @@ TEST_P(ExprInfop, width) { ASSERT_TRUE(info != nullptr); ASSERT_TRUE(c_err == nullptr); - EXPECT_EQ(ei.min, info->min_width); - EXPECT_EQ(ei.max, info->max_width); - EXPECT_EQ(ei.unordered_matches, info->unordered_matches); - EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod); - EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod); + check_info(ei, info); + free(info); +} + +// Check with hs_expression_ext_info function. +TEST_P(ExprInfop, check_ext) { + const expected_info &ei = GetParam(); + SCOPED_TRACE(ei.pattern); + + hs_expr_info_t *info = nullptr; + hs_compile_error_t *c_err = nullptr; + hs_error_t err = + hs_expression_ext_info(ei.pattern, 0, &ei.ext, &info, &c_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(info != nullptr); + ASSERT_TRUE(c_err == nullptr); + + check_info(ei, info); + free(info); +} + +// Check with hs_expression_ext_info function and a nullptr ext param, for +// cases where ext.flags == 0. Functionally identical to check_no_ext above. +TEST_P(ExprInfop, check_ext_null) { + const expected_info &ei = GetParam(); + SCOPED_TRACE(ei.pattern); + + if (ei.ext.flags) { + // This is an extparam test, skip it. + return; + } + + hs_expr_info_t *info = nullptr; + hs_compile_error_t *c_err = nullptr; + hs_error_t err = + hs_expression_ext_info(ei.pattern, 0, nullptr, &info, &c_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(info != nullptr); + ASSERT_TRUE(c_err == nullptr); + check_info(ei, info); free(info); } +static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0 }; + static const expected_info ei_test[] = { - {"abc", 3, 3, 0, 0, 0}, - {"abc.*def", 6, UINT_MAX, 0, 0, 0}, - {"abc|defghi", 3, 6, 0, 0, 0}, - {"abc(def)?", 3, 6, 0, 0, 0}, - {"abc(def){0,3}", 3, 12, 0, 0, 0}, - {"abc(def){1,4}", 6, 15, 0, 0, 0}, - {"", 0, 0, 0, 0, 0}, - {"^", 0, 0, 0, 0, 0}, - {"^\\b", 0, 0, 1, 0, 0}, - {"\\b$", 0, 0, 1, 1, 1}, - {"(?m)\\b$", 0, 0, 1, 1, 0}, - {"\\A", 0, 0, 0, 0, 0}, - {"\\z", 0, 0, 0, 1, 1}, - {"\\Z", 0, 0, 1, 1, 1}, - {"$", 0, 0, 1, 1, 1}, - {"(?m)$", 0, 0, 1, 1, 0}, - {"^foo", 3, 3, 0, 0, 0}, - {"^foo.*bar", 6, UINT_MAX, 0, 0, 0}, - {"^foo.*bar?", 5, UINT_MAX, 0, 0, 0}, - {"^foo.*bar$", 6, UINT_MAX, 1, 1, 1}, - {"^foobar$", 6, 6, 1, 1, 1}, - {"foobar$", 6, 6, 1, 1, 1}, - {"^.*foo", 3, UINT_MAX, 0, 0, 0}, - {"foo\\b", 3, 3, 1, 1, 0}, - {"foo.{1,13}bar", 7, 19, 0, 0, 0}, - {"foo.{10,}bar", 16, UINT_MAX, 0, 0, 0}, - {"foo.{0,10}bar", 6, 16, 0, 0, 0}, - {"foo.{,10}bar", 12, 12, 0, 0, 0}, - {"foo.{10}bar", 16, 16, 0, 0, 0}, - {"(^|\n)foo", 3, 4, 0, 0, 0}, - {"(^\n|)foo", 3, 4, 0, 0, 0}, - {"(?m)^foo", 3, 3, 0, 0, 0}, - {"\\bfoo", 3, 3, 0, 0, 0}, - {"^\\bfoo", 3, 3, 0, 0, 0}, - {"(?m)^\\bfoo", 3, 3, 0, 0, 0}, - {"\\Bfoo", 3, 3, 0, 0, 0}, - {"(foo|bar\\z)", 3, 3, 0, 1, 0}, - {"(foo|bar)\\z", 3, 3, 0, 1, 1}, + {"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"abc.*def", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0}, + {"abc|defghi", NO_EXT_PARAM, 3, 6, 0, 0, 0}, + {"abc(def)?", NO_EXT_PARAM, 3, 6, 0, 0, 0}, + {"abc(def){0,3}", NO_EXT_PARAM, 3, 12, 0, 0, 0}, + {"abc(def){1,4}", NO_EXT_PARAM, 6, 15, 0, 0, 0}, + {"", NO_EXT_PARAM, 0, 0, 0, 0, 0}, + {"^", NO_EXT_PARAM, 0, 0, 0, 0, 0}, + {"^\\b", NO_EXT_PARAM, 0, 0, 1, 0, 0}, + {"\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 1}, + {"(?m)\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 0}, + {"\\A", NO_EXT_PARAM, 0, 0, 0, 0, 0}, + {"\\z", NO_EXT_PARAM, 0, 0, 0, 1, 1}, + {"\\Z", NO_EXT_PARAM, 0, 0, 1, 1, 1}, + {"$", NO_EXT_PARAM, 0, 0, 1, 1, 1}, + {"(?m)$", NO_EXT_PARAM, 0, 0, 1, 1, 0}, + {"^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"^foo.*bar", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0}, + {"^foo.*bar?", NO_EXT_PARAM, 5, UINT_MAX, 0, 0, 0}, + {"^foo.*bar$", NO_EXT_PARAM, 6, UINT_MAX, 1, 1, 1}, + {"^foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1}, + {"foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1}, + {"^.*foo", NO_EXT_PARAM, 3, UINT_MAX, 0, 0, 0}, + {"foo\\b", NO_EXT_PARAM, 3, 3, 1, 1, 0}, + {"foo.{1,13}bar", NO_EXT_PARAM, 7, 19, 0, 0, 0}, + {"foo.{10,}bar", NO_EXT_PARAM, 16, UINT_MAX, 0, 0, 0}, + {"foo.{0,10}bar", NO_EXT_PARAM, 6, 16, 0, 0, 0}, + {"foo.{,10}bar", NO_EXT_PARAM, 12, 12, 0, 0, 0}, + {"foo.{10}bar", NO_EXT_PARAM, 16, 16, 0, 0, 0}, + {"(^|\n)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0}, + {"(^\n|)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0}, + {"(?m)^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"(?m)^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"\\Bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"(foo|bar\\z)", NO_EXT_PARAM, 3, 3, 0, 1, 0}, + {"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1}, + + // Some cases with extended parameters. + {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, }; INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test)); From b093616affdf75d8e2bd131ebc345db82d0d2e4f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Mar 2016 13:05:33 +1100 Subject: [PATCH 106/218] Rose build: move HWLM build code to own file To reduce the size of rose_build_bytecode.cpp a little, move the code that deals with HWLM literal tables into its own new file. --- CMakeLists.txt | 2 + src/rose/rose_build_bytecode.cpp | 618 +---------------------------- src/rose/rose_build_dump.cpp | 20 +- src/rose/rose_build_impl.h | 4 - src/rose/rose_build_matchers.cpp | 662 +++++++++++++++++++++++++++++++ src/rose/rose_build_matchers.h | 63 +++ 6 files changed, 736 insertions(+), 633 deletions(-) create mode 100644 src/rose/rose_build_matchers.cpp create mode 100644 src/rose/rose_build_matchers.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 49c9a4add..36267fc61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -810,6 +810,8 @@ SET (hs_SRCS src/rose/rose_build_infix.h src/rose/rose_build_lookaround.cpp src/rose/rose_build_lookaround.h + src/rose/rose_build_matchers.cpp + src/rose/rose_build_matchers.h src/rose/rose_build_merge.cpp src/rose/rose_build_merge.h src/rose/rose_build_misc.cpp diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 401c15c15..c2d9bcced 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -35,12 +35,12 @@ #include "rose_build_anchored.h" #include "rose_build_infix.h" #include "rose_build_lookaround.h" +#include "rose_build_matchers.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" #include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ -#include "hwlm/hwlm_build.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile.h" @@ -68,7 +68,6 @@ #include "util/compile_error.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/dump_charclass.h" #include "util/internal_report.h" #include "util/multibit_build.h" #include "util/order_check.h" @@ -78,11 +77,9 @@ #include "util/verify_types.h" #include -#include #include #include #include -#include #include #include #include @@ -1674,619 +1671,6 @@ u32 RoseBuildImpl::calcHistoryRequired() const { return m ? m - 1 : 0; } -#ifdef DEBUG -static UNUSED -string dumpMask(const vector &v) { - ostringstream oss; - for (u8 e : v) { - oss << setfill('0') << setw(2) << hex << (unsigned int)e; - } - return oss.str(); -} -#endif - -static -bool maskFromLeftGraph(const LeftEngInfo &left, vector &msk, - vector &cmp) { - const u32 lag = left.lag; - const ReportID report = left.leftfix_report; - - DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); - - assert(left.graph); - const NGHolder &h = *left.graph; - assert(in_degree(h.acceptEod, h) == 1); // no eod reports - - // Start with the set of reporter vertices for this leftfix. - set curr; - for (auto u : inv_adjacent_vertices_range(h.accept, h)) { - if (contains(h[u].reports, report)) { - curr.insert(u); - } - } - assert(!curr.empty()); - - size_t i = HWLM_MASKLEN - lag - 1; - do { - if (curr.empty() || contains(curr, h.start) - || contains(curr, h.startDs)) { - DEBUG_PRINTF("end of the road\n"); - break; - } - - set next; - CharReach cr; - for (NFAVertex v : curr) { - const auto &v_cr = h[v].char_reach; - DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index, - describeClass(v_cr).c_str()); - cr |= v_cr; - insert(&next, inv_adjacent_vertices(v, h)); - } - make_and_cmp_mask(cr, &msk.at(i), &cmp.at(i)); - DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i, - describeClass(cr).c_str(), msk[i], cmp[i]); - curr.swap(next); - } while (i-- > 0); - - return true; -} - -static -bool maskFromLeftCastle(const LeftEngInfo &left, vector &msk, - vector &cmp) { - const u32 lag = left.lag; - const ReportID report = left.leftfix_report; - - DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); - - assert(left.castle); - const CastleProto &c = *left.castle; - - depth min_width(depth::infinity()); - for (const PureRepeat &repeat : c.repeats | map_values) { - if (contains(repeat.reports, report)) { - min_width = min(min_width, repeat.bounds.min); - } - } - - DEBUG_PRINTF("castle min width for this report is %s\n", - min_width.str().c_str()); - - if (!min_width.is_finite() || min_width == depth(0)) { - DEBUG_PRINTF("bad min width\n"); - return false; - } - - u32 len = min_width; - u32 end = HWLM_MASKLEN - lag; - for (u32 i = end; i > end - min(end, len); i--) { - make_and_cmp_mask(c.reach(), &msk.at(i - 1), &cmp.at(i - 1)); - } - - return true; -} - -static -bool maskFromLeft(const LeftEngInfo &left, vector &msk, vector &cmp) { - if (left.lag >= HWLM_MASKLEN) { - DEBUG_PRINTF("too much lag\n"); - return false; - } - - if (left.graph) { - return maskFromLeftGraph(left, msk, cmp); - } else if (left.castle) { - return maskFromLeftCastle(left, msk, cmp); - } - - return false; -} - -static -bool maskFromPreds(const RoseBuildImpl &tbi, const rose_literal_id &id, - const RoseVertex v, vector &msk, vector &cmp) { - const RoseGraph &g = tbi.g; - - // For right now, wuss out and only handle cases with one pred. - if (in_degree(v, g) != 1) { - return false; - } - - // Root successors have no literal before them. - if (tbi.isRootSuccessor(v)) { - return false; - } - - // If we have a single predecessor with a short bound, we may be able to - // fill out a mask with the trailing bytes of the previous literal. This - // allows us to improve literals like the 'bar' in 'fo.bar'. - - RoseEdge e = *(in_edges(v, g).first); - u32 bound = g[e].maxBound; - if (bound != g[e].minBound || bound >= HWLM_MASKLEN) { - return false; - } - - bound += id.s.length(); - if (bound >= HWLM_MASKLEN) { - return false; - } - - DEBUG_PRINTF("bound %u\n", bound); - - RoseVertex u = source(e, g); - if (g[u].literals.size() != 1) { - DEBUG_PRINTF("u has %zu literals\n", g[u].literals.size()); - return false; - } - - u32 u_lit_id = *(g[u].literals.begin()); - const rose_literal_id &u_id = tbi.literals.right.at(u_lit_id); - DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str()); - - // Number of characters to take from the back of u's literal. - size_t u_len = u_id.s.length(); - size_t u_sublen = min(u_len, (size_t)HWLM_MASKLEN - bound); - - size_t i = HWLM_MASKLEN - (bound + u_sublen); - - ue2_literal::const_iterator it, ite; - for (it = u_id.s.begin() + (u_len - u_sublen), ite = u_id.s.end(); - it != ite; ++it) { - make_and_cmp_mask(*it, &msk.at(i), &cmp.at(i)); - ++i; - } - - return true; -} - -static -bool findHamsterMask(const RoseBuildImpl &tbi, const rose_literal_id &id, - const rose_literal_info &info, const RoseVertex v, - vector &msk, vector &cmp) { - // Start with zero masks. - msk.assign(HWLM_MASKLEN, 0); - cmp.assign(HWLM_MASKLEN, 0); - - // Masks can come from literal benefits (for mixed-case literals). - if (info.requires_benefits) { - assert(mixed_sensitivity(id.s)); - - size_t j = 0; - for (ue2_literal::const_reverse_iterator it = id.s.rbegin(), - ite = id.s.rend(); - it != ite && j < HWLM_MASKLEN; ++it, ++j) { - size_t offset = HWLM_MASKLEN - j - 1; - const CharReach &cr = *it; - make_and_cmp_mask(cr, &msk[offset], &cmp[offset]); - } - return true; - } - - const LeftEngInfo &left = tbi.g[v].left; - if (left && left.lag < HWLM_MASKLEN) { - if (maskFromLeft(left, msk, cmp)) { - DEBUG_PRINTF("mask from a leftfix!\n"); - return true; - } - } - - if (id.s.length() < HWLM_MASKLEN) { - if (maskFromPreds(tbi, id, v, msk, cmp)) { - DEBUG_PRINTF("mask from preds!\n"); - return true; - } - } - - return false; -} - -static -bool hamsterMaskCombine(vector &msk, vector &cmp, - const vector &v_msk, const vector &v_cmp) { - assert(msk.size() == HWLM_MASKLEN && cmp.size() == HWLM_MASKLEN); - assert(v_msk.size() == HWLM_MASKLEN && v_cmp.size() == HWLM_MASKLEN); - - u8 all_masks = 0; - - for (size_t i = 0; i < HWLM_MASKLEN; i++) { - u8 filter = ~(cmp[i] ^ v_cmp[i]); - msk[i] &= v_msk[i]; - msk[i] &= filter; - cmp[i] &= filter; - - all_masks |= msk[i]; - } - - // Return false if we have no bits on in any mask elements. - return all_masks != 0; -} - -static -bool findHamsterMask(const RoseBuildImpl &tbi, const rose_literal_id &id, - const rose_literal_info &info, - vector &msk, vector &cmp) { - if (!tbi.cc.grey.roseHamsterMasks) { - return false; - } - - if (!info.delayed_ids.empty()) { - // Not safe to add masks to delayed literals at this late stage. - return false; - } - - size_t num = 0; - vector v_msk, v_cmp; - - for (RoseVertex v : info.vertices) { - if (!findHamsterMask(tbi, id, info, v, v_msk, v_cmp)) { - DEBUG_PRINTF("no mask\n"); - return false; - } - - if (!num++) { - // First (or only) vertex, this becomes the mask/cmp pair. - msk = v_msk; - cmp = v_cmp; - } else { - // Multiple vertices with potentially different masks. We combine - // them into an 'advisory' mask. - if (!hamsterMaskCombine(msk, cmp, v_msk, v_cmp)) { - DEBUG_PRINTF("mask went to zero\n"); - return false; - } - } - } - - normaliseLiteralMask(id.s, msk, cmp); - - if (msk.empty()) { - DEBUG_PRINTF("no mask\n"); - return false; - } - - DEBUG_PRINTF("msk=%s, cmp=%s\n", dumpMask(msk).c_str(), - dumpMask(cmp).c_str()); - return true; -} - -static -bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, - const rose_literal_info &info) { - if (!build.isDirectReport(id)) { - return false; - } - - auto is_simple_exhaustible = [&build](ReportID id) { - const Report &report = build.rm.getReport(id); - return isSimpleExhaustible(report); - }; - - assert(!info.vertices.empty()); - for (const auto &v : info.vertices) { - const auto &reports = build.g[v].reports; - assert(!reports.empty()); - if (!all_of(begin(reports), end(reports), - is_simple_exhaustible)) { - return false; - } - } - return true; -} - -// Called by isNoRunsLiteral below. -static -bool isNoRunsVertex(const RoseBuildImpl &tbi, NFAVertex u) { - const RoseGraph &g = tbi.g; - if (!g[u].isBoring()) { - DEBUG_PRINTF("u=%zu is not boring\n", g[u].idx); - return false; - } - - if (!g[u].reports.empty()) { - DEBUG_PRINTF("u=%zu has accept\n", g[u].idx); - return false; - } - - /* TODO: handle non-root roles as well. It can't be that difficult... */ - - if (!in_degree_equal_to(u, g, 1)) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); - return false; - } - - RoseEdge e; - bool exists; - tie(e, exists) = edge_by_target(tbi.root, u, g); - - if (!exists) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); - return false; - } - - if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].idx); - return false; - } - - for (const auto &oe : out_edges_range(u, g)) { - RoseVertex v = target(oe, g); - if (g[oe].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].idx, - g[target(oe, g)].idx); - return false; - } - if (g[v].left) { - DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].idx); - return false; - } - } - return true; -} - -static -bool isNoRunsLiteral(const RoseBuildImpl &tbi, const u32 id, - const rose_literal_info &info) { - DEBUG_PRINTF("lit id %u\n", id); - - if (info.requires_benefits) { - DEBUG_PRINTF("requires benefits\n"); // which would need confirm - return false; - } - - if (isDirectHighlander(tbi, id, info)) { - DEBUG_PRINTF("highlander direct report\n"); - return true; - } - - // Undelayed vertices. - for (RoseVertex v : info.vertices) { - if (!isNoRunsVertex(tbi, v)) { - return false; - } - } - - // Delayed vertices. - for (u32 d : info.delayed_ids) { - assert(d < tbi.literal_info.size()); - const rose_literal_info &delayed_info = tbi.literal_info.at(d); - assert(delayed_info.undelayed_id == id); - for (RoseVertex v : delayed_info.vertices) { - if (!isNoRunsVertex(tbi, v)) { - return false; - } - } - } - - DEBUG_PRINTF("is no-runs literal\n"); - return true; -} - -void fillHamsterLiteralList(const RoseBuildImpl &tbi, rose_literal_table table, - vector *hl) { - for (const auto &e : tbi.literals.right) { - const u32 id = e.first; - if (!tbi.hasFinalId(id)) { - continue; - } - - if (e.second.delay) { - continue; /* delay id's are virtual-ish */ - } - - if (e.second.table != table) { - continue; /* wrong table */ - } - - assert(id < tbi.literal_info.size()); - const rose_literal_info &info = tbi.literal_info[id]; - u32 final_id = info.final_id; - rose_group groups = info.group_mask; - /* Note: requires_benefits are handled in the literal entries */ - const ue2_literal &lit = e.second.s; - - DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str()); - - vector msk = e.second.msk; // copy - vector cmp = e.second.cmp; // copy - - if (msk.empty()) { - // Try and pick up an advisory mask. - if (!findHamsterMask(tbi, e.second, info, msk, cmp)) { - msk.clear(); cmp.clear(); - } else { - DEBUG_PRINTF("picked up late mask %zu\n", msk.size()); - } - } - - bool noruns = isNoRunsLiteral(tbi, id, info); - - if (info.requires_explode) { - DEBUG_PRINTF("exploding lit\n"); - const vector empty_msk; // msk/cmp will be empty - case_iter cit = caseIterateBegin(lit); - case_iter cite = caseIterateEnd(); - for (; cit != cite; ++cit) { - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d msk=%s, " - "cmp=%s (exploded)\n", - final_id, escapeString(lit.get_string()).c_str(), - 0, noruns, dumpMask(msk).c_str(), - dumpMask(cmp).c_str()); - hl->push_back(hwlmLiteral(*cit, false, noruns, final_id, groups, - empty_msk, empty_msk)); - } - } else { - const std::string &s = lit.get_string(); - const bool nocase = lit.any_nocase(); - - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " - "cmp=%s\n", - final_id, escapeString(s).c_str(), (int)nocase, noruns, - dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - - if (!maskIsConsistent(s, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); - continue; - } - - hl->push_back(hwlmLiteral(lit.get_string(), lit.any_nocase(), - noruns, final_id, groups, msk, cmp)); - } - } -} - -static -aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &tbi, - size_t *fsize, - size_t *historyRequired, - size_t *streamStateRequired) { - *fsize = 0; - - vector fl; - fl.reserve(tbi.literals.size()); - fillHamsterLiteralList(tbi, ROSE_FLOATING, &fl); - if (fl.empty()) { - DEBUG_PRINTF("empty floating matcher\n"); - return nullptr; - } - - hwlmStreamingControl ctl; - hwlmStreamingControl *ctlp; - if (tbi.cc.streaming) { - ctl.history_max = tbi.cc.grey.maxHistoryAvailable; - ctl.history_min = MAX(*historyRequired, - tbi.cc.grey.minHistoryAvailable); - DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n", - ctl.history_max, ctl.history_min); - ctlp = &ctl; - } else { - ctlp = nullptr; // Null for non-streaming. - } - - aligned_unique_ptr ftable = - hwlmBuild(fl, ctlp, false, tbi.cc, tbi.getInitialGroups()); - if (!ftable) { - throw CompileError("Unable to generate bytecode."); - } - - if (tbi.cc.streaming) { - DEBUG_PRINTF("literal_history_required=%zu\n", - ctl.literal_history_required); - DEBUG_PRINTF("literal_stream_state_required=%zu\n", - ctl.literal_stream_state_required); - assert(ctl.literal_history_required <= tbi.cc.grey.maxHistoryAvailable); - *historyRequired = max(*historyRequired, - ctl.literal_history_required); - *streamStateRequired = ctl.literal_stream_state_required; - } - - *fsize = hwlmSize(ftable.get()); - assert(*fsize); - DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize); - return ftable; -} - -namespace { -struct LongerThanLimit { - explicit LongerThanLimit(size_t len) : max_len(len) {} - bool operator()(const hwlmLiteral &lit) const { - return lit.s.length() > max_len; - } -private: - size_t max_len; -}; -} - -static -aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &tbi, - size_t *sbsize) { - *sbsize = 0; - - if (tbi.cc.streaming) { - DEBUG_PRINTF("streaming mode\n"); - return nullptr; - } - - u32 float_min = findMinWidth(tbi, ROSE_FLOATING); - if (float_min > ROSE_SMALL_BLOCK_LEN) { - DEBUG_PRINTF("floating table has large min width %u, fail\n", float_min); - return nullptr; - } - - vector lits; - fillHamsterLiteralList(tbi, ROSE_FLOATING, &lits); - if (lits.empty()) { - DEBUG_PRINTF("no floating table\n"); - return nullptr; - } else if (lits.size() == 1) { - DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); - return nullptr; - } - - vector anchored_lits; - fillHamsterLiteralList(tbi, ROSE_ANCHORED_SMALL_BLOCK, &anchored_lits); - if (anchored_lits.empty()) { - DEBUG_PRINTF("no small-block anchored literals\n"); - return nullptr; - } - - lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end()); - - // Remove literals that are longer than our small block length, as they can - // never match. TODO: improve by removing literals that have a min match - // offset greater than ROSE_SMALL_BLOCK_LEN, which will catch anchored cases - // with preceding dots that put them over the limit. - lits.erase(std::remove_if(lits.begin(), lits.end(), - LongerThanLimit(ROSE_SMALL_BLOCK_LEN)), - lits.end()); - - if (lits.empty()) { - DEBUG_PRINTF("no literals shorter than small block len\n"); - return nullptr; - } - - aligned_unique_ptr hwlm = - hwlmBuild(lits, nullptr, true, tbi.cc, tbi.getInitialGroups()); - if (!hwlm) { - throw CompileError("Unable to generate bytecode."); - } - - *sbsize = hwlmSize(hwlm.get()); - assert(*sbsize); - DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize); - return hwlm; -} - -static -aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &tbi, - size_t *esize) { - *esize = 0; - - vector el; - fillHamsterLiteralList(tbi, ROSE_EOD_ANCHORED, &el); - - if (el.empty()) { - DEBUG_PRINTF("no eod anchored literals\n"); - assert(!tbi.ematcher_region_size); - return nullptr; - } - - assert(tbi.ematcher_region_size); - - hwlmStreamingControl *ctlp = nullptr; // not a streaming case - aligned_unique_ptr etable = - hwlmBuild(el, ctlp, true, tbi.cc, tbi.getInitialGroups()); - if (!etable) { - throw CompileError("Unable to generate bytecode."); - } - - *esize = hwlmSize(etable.get()); - assert(*esize); - DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); - return etable; -} - // Adds a sparse iterator to the end of the iterator table, returning its // offset. static diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index cd32749eb..e56f322b3 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,6 +32,7 @@ #include "hwlm/hwlm_build.h" #include "rose_build_impl.h" +#include "rose_build_matchers.h" #include "rose/rose_dump.h" #include "rose_internal.h" #include "ue2common.h" @@ -458,23 +459,18 @@ struct LongerThanLimit { static void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { - - vector lits; - - fillHamsterLiteralList(build, ROSE_ANCHORED, &lits); + auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED); dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits); - lits.clear(); - fillHamsterLiteralList(build, ROSE_FLOATING, &lits); + lits = fillHamsterLiteralList(build, ROSE_FLOATING); dumpTestLiterals(base + "rose_float_test_literals.txt", lits); - lits.clear(); - fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, &lits); + lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); - lits.clear(); - fillHamsterLiteralList(build, ROSE_FLOATING, &lits); - fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, &lits); + lits = fillHamsterLiteralList(build, ROSE_FLOATING); + auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK); + lits.insert(end(lits), begin(lits2), end(lits2)); lits.erase(remove_if(lits.begin(), lits.end(), LongerThanLimit(ROSE_SMALL_BLOCK_LEN)), lits.end()); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 592476459..7b59c59e9 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -58,7 +58,6 @@ namespace ue2 { struct BoundaryReports; struct CastleProto; struct CompileContext; -struct hwlmLiteral; class ReportManager; class SomSlotManager; @@ -550,9 +549,6 @@ u64a findMaxOffset(const std::set &reports, const ReportManager &rm); void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, std::vector &cmp); -void fillHamsterLiteralList(const RoseBuildImpl &tbi, rose_literal_table table, - std::vector *hl); - #ifndef NDEBUG bool canImplementGraphs(const RoseBuildImpl &tbi); #endif diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp new file mode 100644 index 000000000..83c495560 --- /dev/null +++ b/src/rose/rose_build_matchers.cpp @@ -0,0 +1,662 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for constructing literal tables. + */ + +#include "rose_build_matchers.h" + +#include "rose_build_impl.h" +#include "rose_build_width.h" +#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_literal.h" +#include "nfa/castlecompile.h" +#include "util/charreach_util.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/dump_charclass.h" +#include "util/report.h" +#include "util/report_manager.h" +#include "ue2common.h" + +#include +#include + +#include + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +#ifdef DEBUG +static UNUSED +string dumpMask(const vector &v) { + ostringstream oss; + for (u8 e : v) { + oss << setfill('0') << setw(2) << hex << (unsigned int)e; + } + return oss.str(); +} +#endif + +static +bool maskFromLeftGraph(const LeftEngInfo &left, vector &msk, + vector &cmp) { + const u32 lag = left.lag; + const ReportID report = left.leftfix_report; + + DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); + + assert(left.graph); + const NGHolder &h = *left.graph; + assert(in_degree(h.acceptEod, h) == 1); // no eod reports + + // Start with the set of reporter vertices for this leftfix. + set curr; + for (auto u : inv_adjacent_vertices_range(h.accept, h)) { + if (contains(h[u].reports, report)) { + curr.insert(u); + } + } + assert(!curr.empty()); + + size_t i = HWLM_MASKLEN - lag - 1; + do { + if (curr.empty() || contains(curr, h.start) + || contains(curr, h.startDs)) { + DEBUG_PRINTF("end of the road\n"); + break; + } + + set next; + CharReach cr; + for (NFAVertex v : curr) { + const auto &v_cr = h[v].char_reach; + DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index, + describeClass(v_cr).c_str()); + cr |= v_cr; + insert(&next, inv_adjacent_vertices(v, h)); + } + make_and_cmp_mask(cr, &msk.at(i), &cmp.at(i)); + DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i, + describeClass(cr).c_str(), msk[i], cmp[i]); + curr.swap(next); + } while (i-- > 0); + + return true; +} + +static +bool maskFromLeftCastle(const LeftEngInfo &left, vector &msk, + vector &cmp) { + const u32 lag = left.lag; + const ReportID report = left.leftfix_report; + + DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); + + assert(left.castle); + const CastleProto &c = *left.castle; + + depth min_width(depth::infinity()); + for (const PureRepeat &repeat : c.repeats | map_values) { + if (contains(repeat.reports, report)) { + min_width = min(min_width, repeat.bounds.min); + } + } + + DEBUG_PRINTF("castle min width for this report is %s\n", + min_width.str().c_str()); + + if (!min_width.is_finite() || min_width == depth(0)) { + DEBUG_PRINTF("bad min width\n"); + return false; + } + + u32 len = min_width; + u32 end = HWLM_MASKLEN - lag; + for (u32 i = end; i > end - min(end, len); i--) { + make_and_cmp_mask(c.reach(), &msk.at(i - 1), &cmp.at(i - 1)); + } + + return true; +} + +static +bool maskFromLeft(const LeftEngInfo &left, vector &msk, vector &cmp) { + if (left.lag >= HWLM_MASKLEN) { + DEBUG_PRINTF("too much lag\n"); + return false; + } + + if (left.graph) { + return maskFromLeftGraph(left, msk, cmp); + } else if (left.castle) { + return maskFromLeftCastle(left, msk, cmp); + } + + return false; +} + +static +bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, + const RoseVertex v, vector &msk, vector &cmp) { + const RoseGraph &g = build.g; + + // For right now, wuss out and only handle cases with one pred. + if (in_degree(v, g) != 1) { + return false; + } + + // Root successors have no literal before them. + if (build.isRootSuccessor(v)) { + return false; + } + + // If we have a single predecessor with a short bound, we may be able to + // fill out a mask with the trailing bytes of the previous literal. This + // allows us to improve literals like the 'bar' in 'fo.bar'. + + RoseEdge e = *(in_edges(v, g).first); + u32 bound = g[e].maxBound; + if (bound != g[e].minBound || bound >= HWLM_MASKLEN) { + return false; + } + + bound += id.s.length(); + if (bound >= HWLM_MASKLEN) { + return false; + } + + DEBUG_PRINTF("bound %u\n", bound); + + RoseVertex u = source(e, g); + if (g[u].literals.size() != 1) { + DEBUG_PRINTF("u has %zu literals\n", g[u].literals.size()); + return false; + } + + u32 u_lit_id = *(g[u].literals.begin()); + const rose_literal_id &u_id = build.literals.right.at(u_lit_id); + DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str()); + + // Number of characters to take from the back of u's literal. + size_t u_len = u_id.s.length(); + size_t u_sublen = min(u_len, (size_t)HWLM_MASKLEN - bound); + + size_t i = HWLM_MASKLEN - (bound + u_sublen); + + ue2_literal::const_iterator it, ite; + for (it = u_id.s.begin() + (u_len - u_sublen), ite = u_id.s.end(); + it != ite; ++it) { + make_and_cmp_mask(*it, &msk.at(i), &cmp.at(i)); + ++i; + } + + return true; +} + +static +bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, + const rose_literal_info &info, const RoseVertex v, + vector &msk, vector &cmp) { + // Start with zero masks. + msk.assign(HWLM_MASKLEN, 0); + cmp.assign(HWLM_MASKLEN, 0); + + // Masks can come from literal benefits (for mixed-case literals). + if (info.requires_benefits) { + assert(mixed_sensitivity(id.s)); + + size_t j = 0; + for (ue2_literal::const_reverse_iterator it = id.s.rbegin(), + ite = id.s.rend(); + it != ite && j < HWLM_MASKLEN; ++it, ++j) { + size_t offset = HWLM_MASKLEN - j - 1; + const CharReach &cr = *it; + make_and_cmp_mask(cr, &msk[offset], &cmp[offset]); + } + return true; + } + + const LeftEngInfo &left = build.g[v].left; + if (left && left.lag < HWLM_MASKLEN) { + if (maskFromLeft(left, msk, cmp)) { + DEBUG_PRINTF("mask from a leftfix!\n"); + return true; + } + } + + if (id.s.length() < HWLM_MASKLEN) { + if (maskFromPreds(build, id, v, msk, cmp)) { + DEBUG_PRINTF("mask from preds!\n"); + return true; + } + } + + return false; +} + +static +bool hamsterMaskCombine(vector &msk, vector &cmp, + const vector &v_msk, const vector &v_cmp) { + assert(msk.size() == HWLM_MASKLEN && cmp.size() == HWLM_MASKLEN); + assert(v_msk.size() == HWLM_MASKLEN && v_cmp.size() == HWLM_MASKLEN); + + u8 all_masks = 0; + + for (size_t i = 0; i < HWLM_MASKLEN; i++) { + u8 filter = ~(cmp[i] ^ v_cmp[i]); + msk[i] &= v_msk[i]; + msk[i] &= filter; + cmp[i] &= filter; + + all_masks |= msk[i]; + } + + // Return false if we have no bits on in any mask elements. + return all_masks != 0; +} + +static +bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, + const rose_literal_info &info, + vector &msk, vector &cmp) { + if (!build.cc.grey.roseHamsterMasks) { + return false; + } + + if (!info.delayed_ids.empty()) { + // Not safe to add masks to delayed literals at this late stage. + return false; + } + + size_t num = 0; + vector v_msk, v_cmp; + + for (RoseVertex v : info.vertices) { + if (!findHamsterMask(build, id, info, v, v_msk, v_cmp)) { + DEBUG_PRINTF("no mask\n"); + return false; + } + + if (!num++) { + // First (or only) vertex, this becomes the mask/cmp pair. + msk = v_msk; + cmp = v_cmp; + } else { + // Multiple vertices with potentially different masks. We combine + // them into an 'advisory' mask. + if (!hamsterMaskCombine(msk, cmp, v_msk, v_cmp)) { + DEBUG_PRINTF("mask went to zero\n"); + return false; + } + } + } + + normaliseLiteralMask(id.s, msk, cmp); + + if (msk.empty()) { + DEBUG_PRINTF("no mask\n"); + return false; + } + + DEBUG_PRINTF("msk=%s, cmp=%s\n", dumpMask(msk).c_str(), + dumpMask(cmp).c_str()); + return true; +} + +static +bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, + const rose_literal_info &info) { + if (!build.isDirectReport(id)) { + return false; + } + + auto is_simple_exhaustible = [&build](ReportID id) { + const Report &report = build.rm.getReport(id); + return isSimpleExhaustible(report); + }; + + assert(!info.vertices.empty()); + for (const auto &v : info.vertices) { + const auto &reports = build.g[v].reports; + assert(!reports.empty()); + if (!all_of(begin(reports), end(reports), + is_simple_exhaustible)) { + return false; + } + } + return true; +} + +// Called by isNoRunsLiteral below. +static +bool isNoRunsVertex(const RoseBuildImpl &build, NFAVertex u) { + const RoseGraph &g = build.g; + if (!g[u].isBoring()) { + DEBUG_PRINTF("u=%zu is not boring\n", g[u].idx); + return false; + } + + if (!g[u].reports.empty()) { + DEBUG_PRINTF("u=%zu has accept\n", g[u].idx); + return false; + } + + /* TODO: handle non-root roles as well. It can't be that difficult... */ + + if (!in_degree_equal_to(u, g, 1)) { + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); + return false; + } + + RoseEdge e; + bool exists; + tie(e, exists) = edge_by_target(build.root, u, g); + + if (!exists) { + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); + return false; + } + + if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) { + DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].idx); + return false; + } + + for (const auto &oe : out_edges_range(u, g)) { + RoseVertex v = target(oe, g); + if (g[oe].maxBound != ROSE_BOUND_INF) { + DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].idx, + g[target(oe, g)].idx); + return false; + } + if (g[v].left) { + DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].idx); + return false; + } + } + return true; +} + +static +bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, + const rose_literal_info &info) { + DEBUG_PRINTF("lit id %u\n", id); + + if (info.requires_benefits) { + DEBUG_PRINTF("requires benefits\n"); // which would need confirm + return false; + } + + if (isDirectHighlander(build, id, info)) { + DEBUG_PRINTF("highlander direct report\n"); + return true; + } + + // Undelayed vertices. + for (RoseVertex v : info.vertices) { + if (!isNoRunsVertex(build, v)) { + return false; + } + } + + // Delayed vertices. + for (u32 d : info.delayed_ids) { + assert(d < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info.at(d); + assert(delayed_info.undelayed_id == id); + for (RoseVertex v : delayed_info.vertices) { + if (!isNoRunsVertex(build, v)) { + return false; + } + } + } + + DEBUG_PRINTF("is no-runs literal\n"); + return true; +} + +vector fillHamsterLiteralList(const RoseBuildImpl &build, + rose_literal_table table) { + vector lits; + + for (const auto &e : build.literals.right) { + const u32 id = e.first; + if (!build.hasFinalId(id)) { + continue; + } + + if (e.second.delay) { + continue; /* delay id's are virtual-ish */ + } + + if (e.second.table != table) { + continue; /* wrong table */ + } + + assert(id < build.literal_info.size()); + const rose_literal_info &info = build.literal_info[id]; + u32 final_id = info.final_id; + rose_group groups = info.group_mask; + /* Note: requires_benefits are handled in the literal entries */ + const ue2_literal &lit = e.second.s; + + DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str()); + + vector msk = e.second.msk; // copy + vector cmp = e.second.cmp; // copy + + if (msk.empty()) { + // Try and pick up an advisory mask. + if (!findHamsterMask(build, e.second, info, msk, cmp)) { + msk.clear(); cmp.clear(); + } else { + DEBUG_PRINTF("picked up late mask %zu\n", msk.size()); + } + } + + bool noruns = isNoRunsLiteral(build, id, info); + + if (info.requires_explode) { + DEBUG_PRINTF("exploding lit\n"); + const vector empty_msk; // msk/cmp will be empty + case_iter cit = caseIterateBegin(lit); + case_iter cite = caseIterateEnd(); + for (; cit != cite; ++cit) { + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d msk=%s, " + "cmp=%s (exploded)\n", + final_id, escapeString(lit.get_string()).c_str(), + 0, noruns, dumpMask(msk).c_str(), + dumpMask(cmp).c_str()); + lits.emplace_back(*cit, false, noruns, final_id, groups, + empty_msk, empty_msk); + } + } else { + const std::string &s = lit.get_string(); + const bool nocase = lit.any_nocase(); + + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " + "cmp=%s\n", + final_id, escapeString(s).c_str(), (int)nocase, noruns, + dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + + if (!maskIsConsistent(s, nocase, msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + continue; + } + + lits.emplace_back(lit.get_string(), lit.any_nocase(), noruns, + final_id, groups, msk, cmp); + } + } + + return lits; +} + +aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t *fsize, + size_t *historyRequired, + size_t *streamStateRequired) { + *fsize = 0; + + auto fl = fillHamsterLiteralList(build, ROSE_FLOATING); + if (fl.empty()) { + DEBUG_PRINTF("empty floating matcher\n"); + return nullptr; + } + + hwlmStreamingControl ctl; + hwlmStreamingControl *ctlp; + if (build.cc.streaming) { + ctl.history_max = build.cc.grey.maxHistoryAvailable; + ctl.history_min = MAX(*historyRequired, + build.cc.grey.minHistoryAvailable); + DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n", + ctl.history_max, ctl.history_min); + ctlp = &ctl; + } else { + ctlp = nullptr; // Null for non-streaming. + } + + aligned_unique_ptr ftable = + hwlmBuild(fl, ctlp, false, build.cc, build.getInitialGroups()); + if (!ftable) { + throw CompileError("Unable to generate bytecode."); + } + + if (build.cc.streaming) { + DEBUG_PRINTF("literal_history_required=%zu\n", + ctl.literal_history_required); + DEBUG_PRINTF("literal_stream_state_required=%zu\n", + ctl.literal_stream_state_required); + assert(ctl.literal_history_required <= + build.cc.grey.maxHistoryAvailable); + *historyRequired = max(*historyRequired, + ctl.literal_history_required); + *streamStateRequired = ctl.literal_stream_state_required; + } + + *fsize = hwlmSize(ftable.get()); + assert(*fsize); + DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize); + return ftable; +} + +aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, + size_t *sbsize) { + *sbsize = 0; + + if (build.cc.streaming) { + DEBUG_PRINTF("streaming mode\n"); + return nullptr; + } + + u32 float_min = findMinWidth(build, ROSE_FLOATING); + if (float_min > ROSE_SMALL_BLOCK_LEN) { + DEBUG_PRINTF("floating table has large min width %u, fail\n", + float_min); + return nullptr; + } + + auto lits = fillHamsterLiteralList(build, ROSE_FLOATING); + if (lits.empty()) { + DEBUG_PRINTF("no floating table\n"); + return nullptr; + } else if (lits.size() == 1) { + DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); + return nullptr; + } + + auto anchored_lits = + fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK); + if (anchored_lits.empty()) { + DEBUG_PRINTF("no small-block anchored literals\n"); + return nullptr; + } + + lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end()); + + // Remove literals that are longer than our small block length, as they can + // never match. TODO: improve by removing literals that have a min match + // offset greater than ROSE_SMALL_BLOCK_LEN, which will catch anchored cases + // with preceding dots that put them over the limit. + auto longer_than_limit = [](const hwlmLiteral &lit) { + return lit.s.length() > ROSE_SMALL_BLOCK_LEN; + }; + lits.erase(remove_if(lits.begin(), lits.end(), longer_than_limit), + lits.end()); + + if (lits.empty()) { + DEBUG_PRINTF("no literals shorter than small block len\n"); + return nullptr; + } + + aligned_unique_ptr hwlm = + hwlmBuild(lits, nullptr, true, build.cc, build.getInitialGroups()); + if (!hwlm) { + throw CompileError("Unable to generate bytecode."); + } + + *sbsize = hwlmSize(hwlm.get()); + assert(*sbsize); + DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize); + return hwlm; +} + +aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, + size_t *esize) { + *esize = 0; + + auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); + + if (el.empty()) { + DEBUG_PRINTF("no eod anchored literals\n"); + assert(!build.ematcher_region_size); + return nullptr; + } + + assert(build.ematcher_region_size); + + hwlmStreamingControl *ctlp = nullptr; // not a streaming case + aligned_unique_ptr etable = + hwlmBuild(el, ctlp, true, build.cc, build.getInitialGroups()); + if (!etable) { + throw CompileError("Unable to generate bytecode."); + } + + *esize = hwlmSize(etable.get()); + assert(*esize); + DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); + return etable; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h new file mode 100644 index 000000000..9781f5141 --- /dev/null +++ b/src/rose/rose_build_matchers.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for constructing literal tables. + */ + +#ifndef ROSE_BUILD_MATCHERS_H +#define ROSE_BUILD_MATCHERS_H + +#include "rose_build_impl.h" + +#include + +struct HWLM; + +namespace ue2 { + +struct hwlmLiteral; + +std::vector fillHamsterLiteralList(const RoseBuildImpl &build, + rose_literal_table table); + +aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t *fsize, + size_t *historyRequired, + size_t *streamStateRequired); + +aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, + size_t *sbsize); + +aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, + size_t *esize); + +} // namespace ue2 + +#endif // ROSE_BUILD_MATCHERS_H From f7a40b200def4a5ffd63fca7c799dc5fedf63abb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 3 Mar 2016 13:44:54 +1100 Subject: [PATCH 107/218] Rose: remove dead function roseHandleMatch --- src/rose/match.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index f62a58242..57a1688c8 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -222,29 +222,6 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } -/* handles the firing of external matches */ -static rose_inline -hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end, - struct hs_scratch *scratch) { - struct RoseContext *tctxt = &scratch->tctxt; - - assert(!t->needsCatchup || end == tctxt->minMatchOffset); - DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); - updateLastMatchOffset(tctxt, end); - - int cb_rv = tctxt->cb(end, id, scratch); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - return roseHaltIfExhausted(t, scratch); -} - int roseAnchoredCallback(u64a end, u32 id, void *ctx) { struct RoseContext *tctxt = ctx; struct hs_scratch *scratch = tctxtToScratch(tctxt); From 6af1ead0aac9844154c7e629e14a030dc826221d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 3 Mar 2016 14:14:30 +1100 Subject: [PATCH 108/218] Rose: make HWLM callbacks take scratch as ctx --- src/rose/block.c | 8 ++++---- src/rose/eod.c | 3 ++- src/rose/match.c | 8 ++++---- src/rose/stream.c | 8 ++++---- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index 853f1ead5..749bf4e2f 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -61,11 +61,11 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance, local_buffer, local_alen, - roseAnchoredCallback, &scratch->tctxt); + roseAnchoredCallback, scratch); } else { nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance, local_buffer, local_alen, - roseAnchoredCallback, &scratch->tctxt); + roseAnchoredCallback, scratch); } } @@ -193,7 +193,7 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length); DEBUG_PRINTF("-- %016llx\n", tctxt->groups); hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback, - tctxt, tctxt->groups); + scratch, tctxt->groups); goto exit; } @@ -249,7 +249,7 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); DEBUG_PRINTF("-- %016llx\n", tctxt->groups); hwlmExec(ftable, buffer, flen, t->floatingMinDistance, - roseCallback, tctxt, tctxt->groups); + roseCallback, scratch, tctxt->groups); } exit:; diff --git a/src/rose/eod.c b/src/rose/eod.c index 24e9113d7..449f26f49 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -93,7 +93,8 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, struct RoseContext *tctxt = &scratch->tctxt; const struct HWLM *etable = getELiteralMatcher(t); - hwlmExec(etable, eod_data, eod_len, adj, roseCallback, tctxt, tctxt->groups); + hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, + tctxt->groups); // We may need to fire delayed matches return cleanUpDelayed(t, scratch, 0, offset); diff --git a/src/rose/match.c b/src/rose/match.c index 57a1688c8..b2b0238de 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -223,8 +223,8 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, } int roseAnchoredCallback(u64a end, u32 id, void *ctx) { - struct RoseContext *tctxt = ctx; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = ctx; + struct RoseContext *tctxt = &scratch->tctxt; struct core_info *ci = &scratch->core_info; const struct RoseEngine *t = ci->rose; @@ -507,8 +507,8 @@ anchored_leftovers:; } hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { - struct RoseContext *tctx = ctxt; - struct hs_scratch *scratch = tctxtToScratch(tctx); + struct hs_scratch *scratch = ctxt; + struct RoseContext *tctx = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; u64a real_end = end + tctx->lit_offset_adjust; diff --git a/src/rose/stream.c b/src/rose/stream.c index 759f75532..31a0227f5 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -83,11 +83,11 @@ void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf, start, adj, alen, roseAnchoredCallback, - &scratch->tctxt); + scratch); } else { nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf, - start, adj, alen, roseAnchoredCallback, - &scratch->tctxt); + start, adj, alen, + roseAnchoredCallback, scratch); } next_nfa: @@ -542,7 +542,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, } DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); - hwlmExecStreaming(ftable, scratch, flen, start, roseCallback, tctxt, + hwlmExecStreaming(ftable, scratch, flen, start, roseCallback, scratch, tctxt->groups, stream_state); } From 24ccefa3d310bfa0672694be94f0538f751118cb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 3 Mar 2016 14:23:08 +1100 Subject: [PATCH 109/218] Rose: make all NFA callbacks take scratch as ctx --- src/rose/catchup.c | 42 ++++++++++++++++++++++-------------------- src/rose/match.h | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/rose/catchup.c b/src/rose/catchup.c index c740fe087..40327350c 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -272,8 +272,8 @@ hwlmcb_rv_t runNewNfaToNextMatch(const struct RoseEngine *t, u32 qi, /* for use by mpv (chained) only */ static UNUSED int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); @@ -302,8 +302,8 @@ int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { static UNUSED int roseNfaFinalBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); @@ -468,8 +468,8 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, static UNUSED int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; const struct internal_report *ri = getInternalReport(t, id); @@ -507,8 +507,8 @@ int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { static UNUSED int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); @@ -536,8 +536,8 @@ int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { static UNUSED int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", @@ -564,8 +564,8 @@ int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { static UNUSED int roseNfaBlastAdaptorNoInternalNoChain(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; /* chained nfas are run under the control of the anchored catchup */ @@ -589,8 +589,8 @@ int roseNfaBlastAdaptorNoInternalNoChain(u64a offset, ReportID id, static UNUSED int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; - struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; DEBUG_PRINTF("called\n"); @@ -618,12 +618,12 @@ int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, } int roseNfaAdaptor(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); updateLastMatchOffset(tctxt, offset); - struct hs_scratch *scratch = tctxtToScratch(tctxt); const struct RoseEngine *t = scratch->core_info.rose; if (handleReportInternally(t, scratch, id, offset)) { return MO_CONTINUE_MATCHING; @@ -633,21 +633,23 @@ int roseNfaAdaptor(u64a offset, ReportID id, void *context) { } int roseNfaAdaptorNoInternal(u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); updateLastMatchOffset(tctxt, offset); - return tctxt->cb(offset, id, tctxtToScratch(tctxt)); + return tctxt->cb(offset, id, scratch); } int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context) { - struct RoseContext *tctxt = context; + struct hs_scratch *scratch = context; + struct RoseContext *tctxt = &scratch->tctxt; DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); updateLastMatchOffset(tctxt, offset); /* must be a external report as haig cannot directly participate in chain */ - return tctxt->cb_som(from_offset, offset, id, tctxtToScratch(tctxt)); + return tctxt->cb_som(from_offset, offset, id, scratch); } static really_inline diff --git a/src/rose/match.h b/src/rose/match.h index f98891395..d62fe553e 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -84,7 +84,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->cb = roseNfaAdaptor; } q->som_cb = roseNfaSomAdaptor; - q->context = &scratch->tctxt; + q->context = scratch; q->report_current = 0; DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, " From 93a262e54c4e89c4a898acbea7b3eb63996b4ef8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 3 Mar 2016 15:36:10 +1100 Subject: [PATCH 110/218] Rose: use program for boundary reports Use the program to handle report lists at boundaries, rather than the previous list-of-reports approach. --- src/rose/match.c | 36 ++++++++++ src/rose/rose.h | 3 + src/rose/rose_build_bytecode.cpp | 111 ++++++++++++------------------- src/rose/rose_internal.h | 19 +++--- src/runtime.c | 85 ++++++----------------- 5 files changed, 112 insertions(+), 142 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index b2b0238de..9163cba3a 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -573,3 +573,39 @@ hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, assert(id < rose->literalCount); return roseRunProgram(rose, scratch, programs[id], real_end, match_len, 0); } + +/** + * \brief Execute a boundary report program. + * + * Returns MO_HALT_MATCHING if the stream is exhausted or the user has + * instructed us to halt, or MO_CONTINUE_MATCHING otherwise. + */ +int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, + u64a stream_offset, struct hs_scratch *scratch) { + DEBUG_PRINTF("running boundary program at offset %u\n", program); + + if (can_stop_matching(scratch)) { + DEBUG_PRINTF("can stop matching\n"); + return MO_HALT_MATCHING; + } + + if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) { + /* we cannot delay the initialization of the som deduper logs any longer + * as we are reporting matches. This is done explicitly as we are + * shortcutting the som handling in the vacuous repeats as we know they + * all come from non-som patterns. */ + fatbit_clear(scratch->deduper.som_log[0]); + fatbit_clear(scratch->deduper.som_log[1]); + scratch->deduper.som_log_dirty = 0; + } + + const size_t match_len = 0; + const char in_anchored = 0; + hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, stream_offset, + match_len, in_anchored); + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} diff --git a/src/rose/rose.h b/src/rose/rose.h index 5dc57bc2e..a9058379e 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -131,4 +131,7 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, void *context); +int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, + u64a stream_offset, struct hs_scratch *scratch); + #endif // ROSE_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c2d9bcced..bb11b1a74 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1941,70 +1941,6 @@ struct DerivedBoundaryReports { set report_at_0_eod_full; }; -static -void reserveBoundaryReports(const BoundaryReports &boundary, - const DerivedBoundaryReports &dboundary, - RoseBoundaryReports *out, u32 *currOffset) { - u32 curr = *currOffset; - curr = ROUNDUP_N(curr, alignof(ReportID)); - memset(out, 0, sizeof(*out)); - - /* report lists are + 1 in size due to terminator */ - if (!boundary.report_at_eod.empty()) { - out->reportEodOffset = curr; - curr += sizeof(ReportID) * (boundary.report_at_eod.size() + 1); - } - if (!boundary.report_at_0.empty()) { - out->reportZeroOffset = curr; - curr += sizeof(ReportID) * (boundary.report_at_0.size() + 1); - } - if (!dboundary.report_at_0_eod_full.empty()) { - out->reportZeroEodOffset = curr; - curr += sizeof(ReportID) * (dboundary.report_at_0_eod_full.size() + 1); - } - - DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); - DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); - DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); - - *currOffset = curr; -} - -static -void fillInBoundaryReports(RoseEngine *engine, u32 offset, - const set &rl) { - if (rl.empty()) { - return; - } - - u32 *out = (u32 *)((char *)engine + offset); - assert(ISALIGNED(out)); - - for (ReportID r : rl) { - *out = r; - ++out; - } - - *out = MO_INVALID_IDX; -} - -static -void populateBoundaryReports(RoseEngine *engine, - const BoundaryReports &boundary, - const DerivedBoundaryReports &dboundary, - const RoseBoundaryReports &offsets) { - engine->boundary.reportEodOffset = offsets.reportEodOffset; - engine->boundary.reportZeroOffset = offsets.reportZeroOffset; - engine->boundary.reportZeroEodOffset = offsets.reportZeroEodOffset; - - fillInBoundaryReports(engine, offsets.reportEodOffset, - boundary.report_at_eod); - fillInBoundaryReports(engine, offsets.reportZeroOffset, - boundary.report_at_0); - fillInBoundaryReports(engine, offsets.reportZeroEodOffset, - dboundary.report_at_0_eod_full); -} - static void fillInReportInfo(RoseEngine *engine, u32 reportOffset, const ReportManager &rm, const vector &reports) { @@ -2917,6 +2853,43 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, return program; } +static +u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, + const set &reports) { + if (reports.empty()) { + return 0; + } + + const bool has_som = false; + vector program; + for (const auto &id : reports) { + makeReport(build, bc, id, has_som, program); + } + return writeProgram(bc, flattenProgram({program})); +} + +static +RoseBoundaryReports +makeBoundaryPrograms(RoseBuildImpl &build, build_context &bc, + const BoundaryReports &boundary, + const DerivedBoundaryReports &dboundary) { + RoseBoundaryReports out; + memset(&out, 0, sizeof(out)); + + DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); + DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); + DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); + + out.reportEodOffset = + writeBoundaryProgram(build, bc, boundary.report_at_eod); + out.reportZeroOffset = + writeBoundaryProgram(build, bc, boundary.report_at_0); + out.reportZeroEodOffset = + writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full); + + return out; +} + static void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { const auto &g = build.g; @@ -3788,6 +3761,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.resources.has_anchored = true; } + auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); + // Build NFAs set no_retrigger_queues; bool mpv_as_outfix; @@ -3928,9 +3903,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 activeLeftCount = leftInfoTable.size(); u32 rosePrefixCount = countRosePrefixes(leftInfoTable); - RoseBoundaryReports boundary_out; - reserveBoundaryReports(boundary, dboundary, &boundary_out, &currOffset); - u32 rev_nfa_table_offset; vector rev_nfa_offsets; prepSomRevNfas(ssm, &rev_nfa_table_offset, &rev_nfa_offsets, &currOffset); @@ -4098,7 +4070,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; engine->floatingStreamState = verify_u32(floatingStreamStateRequired); - populateBoundaryReports(engine.get(), boundary, dboundary, boundary_out); + + engine->boundary.reportEodOffset = boundary_out.reportEodOffset; + engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset; + engine->boundary.reportZeroEodOffset = boundary_out.reportZeroEodOffset; write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index e9edbc154..37ff9168a 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -235,14 +235,17 @@ struct RoseStateOffsets { }; struct RoseBoundaryReports { - u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of - * MO_INVALID_IDX terminated list to report at EOD */ - u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of - * MO_INVALID_IDX terminated list to report at offset - * 0 */ - u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of - * MO_INVALID_IDX terminated list to report if eod - * is at offset 0. Superset of other lists. */ + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports at EOD. */ + u32 reportEodOffset; + + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports at offset 0. */ + u32 reportZeroOffset; + + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports if EOD is at offset 0. Superset of other programs. */ + u32 reportZeroEodOffset; }; /* NFA Queue Assignment diff --git a/src/runtime.c b/src/runtime.c index 5f3579186..8a0cb9f98 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -260,57 +260,6 @@ SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) { return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor; } -/** - * \brief Fire callbacks for a boundary report list. - * - * Returns MO_HALT_MATCHING if the user has instructed us to halt, and - * MO_CONTINUE_MATCHING otherwise. - */ - -static never_inline -int processReportList(const struct RoseEngine *rose, u32 base_offset, - u64a stream_offset, hs_scratch_t *scratch) { - DEBUG_PRINTF("running report list at offset %u\n", base_offset); - - if (told_to_stop_matching(scratch)) { - DEBUG_PRINTF("matching has been terminated\n"); - return MO_HALT_MATCHING; - } - - if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) { - /* we cannot delay the initialization of the som deduper logs any longer - * as we are reporting matches. This is done explicitly as we are - * shortcutting the som handling in the vacuous repeats as we know they - * all come from non-som patterns. */ - - fatbit_clear(scratch->deduper.som_log[0]); - fatbit_clear(scratch->deduper.som_log[1]); - scratch->deduper.som_log_dirty = 0; - } - - const ReportID *report = getByOffset(rose, base_offset); - - /* never required to do som as vacuous reports are always external */ - - if (rose->simpleCallback) { - for (; *report != MO_INVALID_IDX; report++) { - int rv = roseSimpleAdaptor(stream_offset, *report, scratch); - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } else { - for (; *report != MO_INVALID_IDX; report++) { - int rv = roseAdaptor(stream_offset, *report, scratch); - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - - return MO_CONTINUE_MATCHING; -} - /** \brief Initialise SOM state. Used in both block and streaming mode. */ static really_inline void initSomState(const struct RoseEngine *rose, char *state) { @@ -488,15 +437,15 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, if (!length) { if (rose->boundary.reportZeroEodOffset) { - processReportList(rose, rose->boundary.reportZeroEodOffset, 0, - scratch); + roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0, + scratch); } goto set_retval; } if (rose->boundary.reportZeroOffset) { - int rv = processReportList(rose, rose->boundary.reportZeroOffset, 0, - scratch); + int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset, + 0, scratch); if (rv == MO_HALT_MATCHING) { goto set_retval; } @@ -559,8 +508,8 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, } if (rose->boundary.reportEodOffset) { - processReportList(rose, rose->boundary.reportEodOffset, length, - scratch); + roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length, + scratch); } set_retval: @@ -727,25 +676,28 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, getHistory(state, rose, id->offset), getHistoryAmount(rose, id->offset), id->offset, status, 0); + // Rose program execution (used for some report paths) depends on these + // values being initialised. + scratch->tctxt.lastMatchOffset = 0; + scratch->tctxt.minMatchOffset = id->offset; + if (rose->somLocationCount) { loadSomFromStream(scratch, id->offset); } if (!id->offset) { if (rose->boundary.reportZeroEodOffset) { - int rv = processReportList(rose, rose->boundary.reportZeroEodOffset, - 0, scratch); + int rv = roseRunBoundaryProgram( + rose, rose->boundary.reportZeroEodOffset, 0, scratch); if (rv == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; return; } } } else { if (rose->boundary.reportEodOffset) { - int rv = processReportList(rose, rose->boundary.reportEodOffset, - id->offset, scratch); + int rv = roseRunBoundaryProgram( + rose, rose->boundary.reportEodOffset, id->offset, scratch); if (rv == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; return; } } @@ -978,9 +930,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (!id->offset && rose->boundary.reportZeroOffset) { DEBUG_PRINTF("zero reports\n"); - processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); - if (unlikely(can_stop_matching(scratch))) { - DEBUG_PRINTF("stream is broken, halting scan\n"); + int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset, + 0, scratch); + if (rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("halting scan\n"); setStreamStatus(state, scratch->core_info.status); if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED; From e466e67b79429af50b0bab590b40b4ea45df847b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 4 Mar 2016 10:10:26 +1100 Subject: [PATCH 111/218] AlignedAllocator: simplify and fix for MSVC, C++11 - Simplify thanks to minimal allocator requirements in C++11. - Add required copy constructor. --- src/util/alloc.h | 61 +++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/src/util/alloc.h b/src/util/alloc.h index ab996a8d8..191bc387e 100644 --- a/src/util/alloc.h +++ b/src/util/alloc.h @@ -78,51 +78,42 @@ void aligned_free_internal(void *ptr); /** \brief Aligned allocator class for use with STL containers. Ensures that * your objects are aligned to N bytes. */ -template class AlignedAllocator { +template +class AlignedAllocator { public: - typedef T value_type; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - typedef T *pointer; - typedef const T *const_pointer; - typedef T &reference; - typedef const T &const_reference; - - template struct rebind { - typedef AlignedAllocator other; - }; - - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - size_type max_size() const { - return std::numeric_limits::max() / sizeof(value_type); - } - - pointer allocate(size_type size) const { - return static_cast( - aligned_malloc_internal(size * sizeof(value_type), N)); - } + using value_type = T; - void deallocate(pointer x, size_type) const { aligned_free_internal(x); } + AlignedAllocator() noexcept {} - void construct(pointer x, const value_type &val) const { - new (x) value_type(val); - } + template + AlignedAllocator(const AlignedAllocator &) noexcept {} - void destroy(pointer p) const { p->~value_type(); } + template struct rebind { + using other = AlignedAllocator; + }; - bool operator==(const AlignedAllocator &) const { - // All instances of AlignedAllocator can dealloc each others' memory. - return true; + T *allocate(std::size_t size) const { + size_t alloc_size = size * sizeof(T); + return static_cast(aligned_malloc_internal(alloc_size, N)); } - bool operator!=(const AlignedAllocator &) const { - // All instances of AlignedAllocator can dealloc each others' memory. - return false; + void deallocate(T *x, std::size_t) const noexcept { + aligned_free_internal(x); } }; +template +bool operator==(const AlignedAllocator &, + const AlignedAllocator &) { + return true; +} + +template +bool operator!=(const AlignedAllocator &a, + const AlignedAllocator &b) { + return !(a == b); +} + } // namespace ue2 #endif From ff7e101757d21a67499b65cb807254b3f66a97d5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 4 Mar 2016 13:54:30 +1100 Subject: [PATCH 112/218] Rose: be more precise with CATCHUP instruction - Fix bugs introduced by recent addition of the boundary program. It's not safe to do catchup there. - Only do catchup once per report set, when necessary. --- src/rose/match.c | 6 +++++ src/rose/rose_build_bytecode.cpp | 45 +++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index 9163cba3a..9ff7db06d 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -599,6 +599,12 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, scratch->deduper.som_log_dirty = 0; } + // Keep assertions in program report path happy. At offset zero, there can + // have been no earlier reports. At EOD, all earlier reports should have + // been handled and we will have been caught up to the stream offset by the + // time we are running boundary report programs. + scratch->tctxt.minMatchOffset = stream_offset; + const size_t match_len = 0; const char in_anchored = 0; hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, stream_offset, diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bb11b1a74..51a0eeef1 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2486,6 +2486,30 @@ void makeDedupeSom(const ReportID id, vector &report_block) { report_block.push_back(move(ri)); } +static +void makeCatchup(RoseBuildImpl &build, build_context &bc, + const flat_set &reports, + vector &program) { + if (!bc.needs_catchup) { + return; + } + + // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run + // before reports are triggered. + + auto report_needs_catchup = [&](const ReportID &id) { + const Report &report = build.rm.getReport(id); + return report.type != INTERNAL_ROSE_CHAIN; + }; + + if (!any_of(begin(reports), end(reports), report_needs_catchup)) { + DEBUG_PRINTF("none of the given reports needs catchup\n"); + return; + } + + program.emplace_back(ROSE_INSTR_CATCH_UP); +} + static void makeReport(RoseBuildImpl &build, build_context &bc, const ReportID id, const bool has_som, vector &program) { @@ -2503,13 +2527,6 @@ void makeReport(RoseBuildImpl &build, build_context &bc, const ReportID id, report_block.push_back(move(ri)); } - // Catch up -- everything except the INTERNAL_ROSE_CHAIN report needs this. - // TODO: this could be floated in front of all the reports and only done - // once. - if (bc.needs_catchup && report.type != INTERNAL_ROSE_CHAIN) { - report_block.emplace_back(ROSE_INSTR_CATCH_UP); - } - // If this report has an exhaustion key, we can check it in the program // rather than waiting until we're in the callback adaptor. if (report.ekey != INVALID_EKEY) { @@ -2651,7 +2668,10 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, has_som = true; } - for (ReportID id : g[v].reports) { + const auto &reports = g[v].reports; + makeCatchup(build, bc, reports, program); + + for (ReportID id : reports) { makeReport(build, bc, id, has_som, program); } } @@ -2860,6 +2880,10 @@ u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, return 0; } + // Note: no CATCHUP instruction is necessary in the boundary case, as we + // should always be caught up (and may not even have the resources in + // scratch to support it). + const bool has_som = false; vector program; for (const auto &id : reports) { @@ -3565,8 +3589,11 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeRoleCheckNotHandled(bc, v, program); } + const auto &reports = g[v].reports; + makeCatchup(build, bc, reports, program); + const bool has_som = false; - for (const auto &id : g[v].reports) { + for (const auto &id : reports) { makeReport(build, bc, id, has_som, program); } From 9c9eb2ca9a3bebeb35ad85c2faaaf740375662cf Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 4 Mar 2016 14:46:42 +1100 Subject: [PATCH 113/218] makeReport: remove unused param --- src/rose/rose_build_bytecode.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 51a0eeef1..bcf42eed7 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2511,7 +2511,7 @@ void makeCatchup(RoseBuildImpl &build, build_context &bc, } static -void makeReport(RoseBuildImpl &build, build_context &bc, const ReportID id, +void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, vector &program) { assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); @@ -2672,7 +2672,7 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, makeCatchup(build, bc, reports, program); for (ReportID id : reports) { - makeReport(build, bc, id, has_som, program); + makeReport(build, id, has_som, program); } } @@ -2887,7 +2887,7 @@ u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, const bool has_som = false; vector program; for (const auto &id : reports) { - makeReport(build, bc, id, has_som, program); + makeReport(build, id, has_som, program); } return writeProgram(bc, flattenProgram({program})); } @@ -3594,7 +3594,7 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, const bool has_som = false; for (const auto &id : reports) { - makeReport(build, bc, id, has_som, program); + makeReport(build, id, has_som, program); } return program; From 4a482b07ea65ef9b64c1351af6b853210f1637f0 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 7 Mar 2016 14:48:01 +1100 Subject: [PATCH 114/218] UE-2919: add termination checks around handleReportInternally --- src/rose/catchup.c | 8 ++++++-- src/rose/catchup.h | 2 ++ src/rose/match.c | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 40327350c..11178675d 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -488,7 +488,8 @@ int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { offset, id); if (handleReportInternally(t, scratch, id, offset)) { - return MO_CONTINUE_MATCHING; + return can_stop_matching(scratch) ? MO_HALT_MATCHING + : MO_CONTINUE_MATCHING; } updateLastMatchOffset(tctxt, offset); @@ -626,7 +627,8 @@ int roseNfaAdaptor(u64a offset, ReportID id, void *context) { const struct RoseEngine *t = scratch->core_info.rose; if (handleReportInternally(t, scratch, id, offset)) { - return MO_CONTINUE_MATCHING; + return can_stop_matching(scratch) ? MO_HALT_MATCHING + : MO_CONTINUE_MATCHING; } return tctxt->cb(offset, id, scratch); @@ -905,6 +907,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, hwlmcb_rv_t rv = roseCatchUpMPV(t, report_ok_loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { + DEBUG_PRINTF("terminating...\n"); return rv; } @@ -1060,6 +1063,7 @@ hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch) { rv = roseCatchUpMPV(t, loc, scratch); assert(rv != HWLM_CONTINUE_MATCHING || scratch->catchup_pq.qm_size <= t->outfixEndQueue); + assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING); return rv; } diff --git a/src/rose/catchup.h b/src/rose/catchup.h index 910aa8da0..692b184bd 100644 --- a/src/rose/catchup.h +++ b/src/rose/catchup.h @@ -105,6 +105,7 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, struct hs_scratch *scratch) { u64a cur_offset = loc + scratch->core_info.buf_offset; assert(cur_offset >= scratch->tctxt.minMatchOffset); + assert(!can_stop_matching(scratch)); if (canSkipCatchUpMPV(t, scratch, cur_offset)) { updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset); @@ -151,6 +152,7 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, || scratch->tctxt.minMatchOffset == end); assert(rv != HWLM_CONTINUE_MATCHING || scratch->tctxt.minNonMpvMatchOffset == end); + assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING); return rv; } diff --git a/src/rose/match.c b/src/rose/match.c index 9ff7db06d..96f090281 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -184,6 +184,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, * be known to be consistent */ if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup) == HWLM_TERMINATE_MATCHING) { + DEBUG_PRINTF("terminating...\n"); return HWLM_TERMINATE_MATCHING; } } From e10d2eb2697015fd6bdf9200ffe71661ef26feb8 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 8 Mar 2016 14:53:03 +1100 Subject: [PATCH 115/218] PrintTo functions for google test and valgrind --- unit/internal/multiaccel_matcher.cpp | 5 +++++ unit/internal/nfagraph_repeat.cpp | 6 ++++++ unit/internal/nfagraph_width.cpp | 11 +++++++++++ 3 files changed, 22 insertions(+) diff --git a/unit/internal/multiaccel_matcher.cpp b/unit/internal/multiaccel_matcher.cpp index 1e689430a..34e5c5ed8 100644 --- a/unit/internal/multiaccel_matcher.cpp +++ b/unit/internal/multiaccel_matcher.cpp @@ -287,3 +287,8 @@ static const MultiaccelTestParam multiaccelTests[] = { }; INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests)); + +// boring stuff for google test +void PrintTo(const MultiaccelTestParam &p, ::std::ostream *os) { + *os << "MultiaccelTestParam: " << p.match_pattern; +} diff --git a/unit/internal/nfagraph_repeat.cpp b/unit/internal/nfagraph_repeat.cpp index 53506b83b..2473d7556 100644 --- a/unit/internal/nfagraph_repeat.cpp +++ b/unit/internal/nfagraph_repeat.cpp @@ -103,3 +103,9 @@ TEST_P(NFAPureRepeatTest, Check) { ASSERT_EQ(t.minBound, repeat.bounds.min); ASSERT_EQ(t.maxBound, repeat.bounds.max); } + +// for google test +void PrintTo(const PureRepeatTest &p, ::std::ostream *os) { + *os << "PureRepeatTest: " << p.pattern + << "{" << p.minBound << ',' << p.maxBound << '}'; +} diff --git a/unit/internal/nfagraph_width.cpp b/unit/internal/nfagraph_width.cpp index 81c49532e..03508ea84 100644 --- a/unit/internal/nfagraph_width.cpp +++ b/unit/internal/nfagraph_width.cpp @@ -84,3 +84,14 @@ TEST_P(NFAWidthTest, Check) { ASSERT_EQ(t.minWidth, findMinWidth(*w)); ASSERT_EQ(t.maxWidth, findMaxWidth(*w)); } + +// for google test +void PrintTo(const WidthTest &w, ::std::ostream *os) { + *os << "WidthTest: " << w.pattern << "{" << w.minWidth << ','; + if (w.maxWidth == depth::infinity()) { + *os << "inf"; + } else { + *os << w.maxWidth; + } + *os << '}'; +} From d0aa138adab0592e1a9ad9dc4e5d5840ff346495 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 14 Mar 2016 09:28:17 +1100 Subject: [PATCH 116/218] castle/repeat: fix a number of bugs - Add fits_in_len_bytes assertions for packed stores. Corrects the assertion formerly on line 888. - In exclusive mode, don't overwrite packedCtrlSize with the max of the group; each repeat should know how many bytes it is using, even if they share the same stream state. - Ensure that exclusive mode stream state is sized correctly. --- src/nfa/castlecompile.cpp | 21 ++++++++------------- src/nfa/repeat.c | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 5e8b662af..d7312b855 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -361,25 +361,22 @@ void buildSubcastles(const CastleProto &proto, vector &subs, DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i, repeatTypeName(rtype), pr.bounds.str().c_str()); - u32 subScratchStateSize; - u32 subStreamStateSize; - SubCastle &sub = subs[i]; RepeatInfo &info = infos[i]; - // handle exclusive case differently + info.packedCtrlSize = rsi.packedCtrlSize; + u32 subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); + + // Handle stream/scratch space alloc for exclusive case differently. if (contains(groupId, i)) { u32 id = groupId.at(i); - maxStreamSize[id] = MAX(maxStreamSize[id], rsi.packedCtrlSize); + maxStreamSize[id] = max(maxStreamSize[id], subStreamStateSize); + // SubCastle full/stream state offsets are written in for the group + // below. } else { - subScratchStateSize = verify_u32(sizeof(RepeatControl)); - subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); - - info.packedCtrlSize = rsi.packedCtrlSize; sub.fullStateOffset = scratchStateSize; sub.streamStateOffset = streamStateSize; - - scratchStateSize += subScratchStateSize; + scratchStateSize += verify_u32(sizeof(RepeatControl)); streamStateSize += subStreamStateSize; } @@ -420,8 +417,6 @@ void buildSubcastles(const CastleProto &proto, vector &subs, u32 top = j.first; u32 id = j.second; SubCastle &sub = subs[top]; - RepeatInfo &info = infos[top]; - info.packedCtrlSize = maxStreamSize[id]; if (!scratchOffset[id]) { sub.fullStateOffset = scratchStateSize; sub.streamStateOffset = streamStateSize; diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index d12bc5a1d..339829a52 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -882,15 +882,25 @@ enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info, return REPEAT_NOMATCH; } +/** \brief True if the given value can be packed into len bytes. */ +static really_inline +int fits_in_len_bytes(u64a val, u32 len) { + if (len >= 8) { + return 1; + } + return val <= (1ULL << (len * 8)); +} + static really_inline void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) { assert(val <= offset); - assert(max < (1ULL << (8 * len))); + assert(fits_in_len_bytes(max, len)); u64a delta = offset - val; if (delta >= max) { delta = max; } DEBUG_PRINTF("delta %llu\n", delta); + assert(fits_in_len_bytes(delta, len)); partial_store_u64a(dest, delta, len); } @@ -967,6 +977,7 @@ void repeatPackBitmap(char *dest, const struct RepeatInfo *info, DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize); // Write out packed bitmap. + assert(fits_in_len_bytes(bitmap, info->packedCtrlSize)); partial_store_u64a(dest, bitmap, info->packedCtrlSize); } @@ -1440,6 +1451,7 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n", xs->first, xs->last, patch); DEBUG_PRINTF("value:%llu\n", val); + assert(fits_in_len_bytes(val, encoding_size)); partial_store_u64a(ring + encoding_size * idx, val, encoding_size); mmbit_set(active, patch_count, idx); } From 2cd28a5fa4ac6534d92bc5efa8455c2763faf8d7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 15 Mar 2016 16:31:13 +1100 Subject: [PATCH 117/218] scratch: Remove (now unused) tctxtToScratch --- src/scratch.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/scratch.h b/src/scratch.h index 99cdb3700..dc99106f9 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -180,12 +180,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 som_store_count; }; -static really_inline -struct hs_scratch *tctxtToScratch(struct RoseContext *tctxt) { - return (struct hs_scratch *) - ((char *)tctxt - offsetof(struct hs_scratch, tctxt)); -} - /* array of fatbit ptr; TODO: why not an array of fatbits? */ static really_inline struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) { From 9ae908fd119467320899f264f6fdc2fdc5f642b7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 15 Mar 2016 16:36:39 +1100 Subject: [PATCH 118/218] shufticompile: Remove unused mergeShuftiMask --- src/nfa/shufticompile.cpp | 11 +---------- src/nfa/shufticompile.h | 4 +--- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/src/nfa/shufticompile.cpp b/src/nfa/shufticompile.cpp index 13ec9d0c4..05072a44d 100644 --- a/src/nfa/shufticompile.cpp +++ b/src/nfa/shufticompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -163,15 +163,6 @@ void shuftiBuildDoubleMasks(const CharReach &onechar, memcpy(hi2, hi2_a.data(), sizeof(m128)); } -void mergeShuftiMask(m128 *lo, const m128 lo_in, u32 lo_bits) { - assert(lo_bits <= 8); - const u8 *lo_in_p = (const u8 *)&lo_in; - u8 *lo_p = (u8 *)lo; - for (u32 i = 0; i < 16; i++) { - lo_p[i] |= lo_in_p[i] << lo_bits; - } -} - #ifdef DUMP_SUPPORT CharReach shufti2cr(const m128 lo_in, const m128 hi_in) { diff --git a/src/nfa/shufticompile.h b/src/nfa/shufticompile.h index b0cc1b1b4..2795b73a4 100644 --- a/src/nfa/shufticompile.h +++ b/src/nfa/shufticompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,8 +54,6 @@ void shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2); -void mergeShuftiMask(m128 *lo, const m128 lo_in, u32 lo_bits); - #ifdef DUMP_SUPPORT /** From 5dd4aa9c13957aa996c52c7bdeef10e42d95b2c0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 5 Nov 2015 16:32:10 +1100 Subject: [PATCH 119/218] ng_find_matches: Simplify and improve performance Improve performance by using bitsets rather than sets of vertex indices. --- util/ng_find_matches.cpp | 187 +++++++++++++++++++++------------------ 1 file changed, 103 insertions(+), 84 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index d2456c8fc..4d188d787 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,34 +47,59 @@ using namespace std; using namespace ue2; -// convenience typedefs -typedef map SOMMap; -typedef set > MatchSet; +namespace { + +struct StateSet { + explicit StateSet(size_t sz) : s(sz), som(sz, 0) {} + boost::dynamic_bitset<> s; // bitset of states that are on + vector som; // som value for each state +}; + +using MatchSet = set>; struct fmstate { - SOMMap states; - SOMMap next; - size_t offset; - unsigned char cur; - unsigned char prev; + const size_t num_states; // number of vertices in graph + StateSet states; // currently active states + StateSet next; // states on after this iteration + vector vertices; // mapping from index to vertex + size_t offset = 0; + unsigned char cur = 0; + unsigned char prev = 0; const bool som; const bool utf8; const bool allowStartDs; const ReportManager &rm; - fmstate(const bool som_in, const bool utf8_in, const bool aSD_in, + boost::dynamic_bitset<> accept; // states leading to accept + boost::dynamic_bitset<> accept_with_eod; // states leading to accept or eod + + fmstate(const NGHolder &g, bool som_in, bool utf8_in, bool aSD_in, const ReportManager &rm_in) - : offset(0), cur(0), prev(0), som(som_in), utf8(utf8_in), - allowStartDs(aSD_in), rm(rm_in) {} + : num_states(num_vertices(g)), states(num_states), next(num_states), + vertices(num_vertices(g), NFAGraph::null_vertex()), som(som_in), + utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in), accept(num_states), + accept_with_eod(num_states) { + // init states + states.s.set(g[g.start].index); + if (allowStartDs) { + states.s.set(g[g.startDs].index); + } + // fill vertex mapping + for (const auto &v : vertices_range(g)) { + vertices[g[v].index] = v; + } + // init accept states + for (const auto &u : inv_adjacent_vertices_range(g.accept, g)) { + accept.set(g[u].index); + } + accept_with_eod = accept; + for (const auto &u : inv_adjacent_vertices_range(g.acceptEod, g)) { + accept_with_eod.set(g[u].index); + } + } }; -static -void initStates(const NGHolder &g, struct fmstate &state) { - state.states.insert(make_pair(g.start, 0)); - if (state.allowStartDs) { - state.states.insert(make_pair(g.startDs, 0)); - } -} +} // namespace static bool isWordChar(const unsigned char c) { @@ -115,17 +140,9 @@ bool isUtf8CodePoint(const char c) { } static -bool canReach(const NGHolder &g, const NFAVertex &src, const NFAVertex &dst, +bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) { - // find relevant edge and see whether it has asserts - NFAEdge e; - bool exists; - u32 flags; - - tie(e, exists) = edge(src, dst, g); - assert(exists); - - flags = g[e].assert_flags; + auto flags = g[e].assert_flags; if (!flags) { return true; } @@ -160,33 +177,35 @@ bool canReach(const NGHolder &g, const NFAVertex &src, const NFAVertex &dst, static void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, bool allowEodMatches) { - SOMMap::const_iterator it, ite; + auto acc_states = state.states.s; + acc_states &= allowEodMatches ? state.accept_with_eod : state.accept; - for (it = state.states.begin(), ite = state.states.end(); it != ite; ++it) { - NFAGraph::adjacency_iterator ai, ae; + for (size_t i = acc_states.find_first(); i != acc_states.npos; + i = acc_states.find_next(i)) { + const NFAVertex u = state.vertices[i]; + const size_t &som_offset = state.states.som[i]; - // we can't accept anything from startDs inbetween UTF-8 codepoints - if (state.utf8 && it->first == g.startDs && !isUtf8CodePoint(state.cur)) { + // we can't accept anything from startDs in between UTF-8 codepoints + if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) { continue; } - for (tie(ai, ae) = adjacent_vertices(it->first, g); ai != ae; ++ai) { - if (*ai == g.accept || (*ai == g.acceptEod && allowEodMatches)) { + for (const auto &e : out_edges_range(u, g)) { + NFAVertex v = target(e, g); + if (v == g.accept || (v == g.acceptEod && allowEodMatches)) { // check edge assertions if we are allowed to reach accept - if (!canReach(g, it->first, *ai, state)) { + if (!canReach(g, e, state)) { continue; } DEBUG_PRINTF("match found at %zu\n", state.offset); - assert(!g[it->first].reports.empty()); - for (const auto &report_id : - g[it->first].reports) { + assert(!g[u].reports.empty()); + for (const auto &report_id : g[u].reports) { const Report &ri = state.rm.getReport(report_id); DEBUG_PRINTF("report %u has offset adjustment %d\n", report_id, ri.offsetAdjust); - matches.insert( - make_pair(it->second, state.offset + ri.offsetAdjust)); + matches.emplace(som_offset, state.offset + ri.offsetAdjust); } } } @@ -195,55 +214,57 @@ void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, static void step(const NGHolder &g, struct fmstate &state) { - state.next.clear(); - SOMMap::iterator it, ite; + state.next.s.reset(); - for (it = state.states.begin(), ite = state.states.end(); it != ite; ++it) { - NFAGraph::adjacency_iterator ai, ae; + for (size_t i = state.states.s.find_first(); i != state.states.s.npos; + i = state.states.s.find_next(i)) { + const NFAVertex &u = state.vertices[i]; + const size_t &u_som_offset = state.states.som[i]; - for (tie(ai, ae) = adjacent_vertices(it->first, g); ai != ae; ++ai) { - if (*ai == g.acceptEod) { + for (const auto &e : out_edges_range(u, g)) { + NFAVertex v = target(e, g); + if (v == g.acceptEod) { // can't know the future: we don't know if we're at EOD. continue; } - if (*ai == g.accept) { + if (v == g.accept) { continue; } - if (!state.allowStartDs && *ai == g.startDs) { + if (!state.allowStartDs && v == g.startDs) { continue; } - const CharReach &cr = g[*ai].char_reach; - // check reachability and edge assertions - if (cr.test(state.cur) && canReach(g, it->first, *ai, state)) { - SOMMap::const_iterator ni; - size_t next_som; + const CharReach &cr = g[v].char_reach; + const size_t v_idx = g[v].index; + // check reachability and edge assertions + if (cr.test(state.cur) && canReach(g, e, state)) { // if we aren't in SOM mode, just set every SOM to 0 if (!state.som) { - state.next[*ai] = 0; + state.next.s.set(v_idx); + state.next.som[v_idx] = 0; continue; } // if this is first vertex since start, use current offset as SOM - if (it->first == g.start || it->first == g.startDs || - is_virtual_start(it->first, g)) { + size_t next_som; + if (u == g.start || u == g.startDs || is_virtual_start(u, g)) { next_som = state.offset; } else { // else, inherit SOM from predecessor - next_som = it->second; + next_som = u_som_offset; } // check if the vertex is already active - ni = state.next.find(*ai); - // if this vertex is not yet active, use current SOM - if (ni == state.next.end()) { - state.next[*ai] = next_som; + if (!state.next.s.test(v_idx)) { + state.next.s.set(v_idx); + state.next.som[v_idx] = next_som; } else { // else, work out leftmost SOM - state.next[*ai] = min(next_som, ni->second); + state.next.som[v_idx] = + min(next_som, state.next.som[v_idx]); } } } @@ -251,34 +272,32 @@ void step(const NGHolder &g, struct fmstate &state) { } // filter extraneous matches -static void filterMatches(MatchSet &matches) { +static +void filterMatches(MatchSet &matches) { set eom; - MatchSet::iterator msit; // first, collect all end-offset matches - for (msit = matches.begin(); msit != matches.end(); ++msit) { - eom.insert(msit->second); + for (const auto &match : matches) { + eom.insert(match.second); } // now, go through all the end-offsets and filter extra matches - set::const_iterator eomit; - for (eomit = eom.begin(); eomit != eom.end(); ++eomit) { - + for (const auto &elem : eom) { // find minimum SOM for this EOM size_t min_som = -1U; - for (msit = matches.begin(); msit != matches.end(); ++msit) { + for (const auto &match : matches) { // skip entries with wrong EOM - if (msit->second != *eomit) { + if (match.second != elem) { continue; } - min_som = min(min_som, msit->first); + min_som = min(min_som, match.first); } - msit = matches.begin(); + auto msit = matches.begin(); while (msit != matches.end()) { // skip everything that doesn't match - if (msit->second != *eomit || msit->first <= min_som) { + if (msit->second != elem || msit->first <= min_som) { ++msit; continue; } @@ -295,14 +314,13 @@ static void filterMatches(MatchSet &matches) { void findMatches(const NGHolder &g, const ReportManager &rm, const string &input, MatchSet &matches, const bool notEod, const bool som, const bool utf8) { - const bool allowStartDs = (proper_out_degree(g.startDs, g) > 0); + assert(hasCorrectlyNumberedVertices(g)); - struct fmstate state(som, utf8, allowStartDs, rm); + const bool allowStartDs = (proper_out_degree(g.startDs, g) > 0); - initStates(g, state); + struct fmstate state(g, som, utf8, allowStartDs, rm); - string::const_iterator it, ite; - for (it = input.begin(), ite = input.end(); it != ite; ++it) { + for (auto it = input.begin(), ite = input.end(); it != ite; ++it) { state.offset = distance(input.begin(), it); state.cur = *it; @@ -310,14 +328,15 @@ void findMatches(const NGHolder &g, const ReportManager &rm, getMatches(g, matches, state, false); - DEBUG_PRINTF("index %zu, %zu states on\n", state.offset, state.next.size()); - if (state.next.empty()) { + DEBUG_PRINTF("index %zu, %zu states on\n", state.offset, + state.next.s.count()); + if (state.next.s.empty()) { if (state.som) { filterMatches(matches); } return; } - state.states.swap(state.next); + state.states = state.next; state.prev = state.cur; } state.offset = input.size(); From 839c90594c9d92f8170952337ad2c3922b80b909 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 23 Mar 2016 11:47:54 +1100 Subject: [PATCH 120/218] mcclellan: Remove the use of state numbering McClellan and Haig only need to know which states are not participants (i.e. unused start, startDs) --- src/nfagraph/ng_haig.cpp | 68 ++++++++++++------------- src/nfagraph/ng_haig.h | 8 +-- src/nfagraph/ng_mcclellan.cpp | 74 ++++++++++++---------------- src/nfagraph/ng_mcclellan_internal.h | 7 ++- src/nfagraph/ng_restructuring.cpp | 11 +++++ src/nfagraph/ng_restructuring.h | 7 +++ 6 files changed, 87 insertions(+), 88 deletions(-) diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index d06083bd2..4409924b6 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -69,17 +69,15 @@ struct haig_too_wide { template static -void populateInit(const NGHolder &g, - const ue2::unordered_map &state_ids, +void populateInit(const NGHolder &g, const flat_set &unused, stateset *init, stateset *initDS, vector *v_by_index) { DEBUG_PRINTF("graph kind: %u\n", (int)g.kind); for (auto v : vertices_range(g)) { - u32 v_index = g[v].index; - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } - + u32 v_index = g[v].index; if (is_any_start(v, g)) { init->set(v_index); if (hasSelfLoop(v, g) || is_triggered(g)) { @@ -116,8 +114,8 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { class Automaton_Base { protected: Automaton_Base(const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in) - : graph(graph_in), state_ids(state_ids_in) { + const flat_set &unused_in) + : graph(graph_in), unused(unused_in) { calculateAlphabet(graph, alpha, unalpha, &alphasize); assert(alphasize <= ALPHABET_SIZE); } @@ -126,7 +124,7 @@ class Automaton_Base { static bool canPrune(const flat_set &) { return false; } const NGHolder &graph; - const ue2::unordered_map &state_ids; + const flat_set &unused; array alpha; array unalpha; @@ -145,14 +143,13 @@ class Automaton_Big : public Automaton_Base { typedef map StateMap; Automaton_Big(const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, - som_type som, const vector> &triggers, - bool unordered_som) - : Automaton_Base(graph_in, state_ids_in), numStates(num_vertices(graph)), + const flat_set &unused_in, som_type som, + const vector> &triggers, bool unordered_som) + : Automaton_Base(graph_in, unused_in), numStates(num_vertices(graph)), init(numStates), initDS(numStates), squash(numStates), accept(numStates), acceptEod(numStates), toppable(numStates), dead(numStates) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); + populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; @@ -175,7 +172,7 @@ class Automaton_Big : public Automaton_Base { cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { - markToppableStarts(graph, state_ids, false, triggers, &toppable); + markToppableStarts(graph, unused, false, triggers, &toppable); } } @@ -244,11 +241,11 @@ class Automaton_Graph : public Automaton_Base { typedef ue2::unordered_map StateMap; Automaton_Graph(const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, + const flat_set &unused_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, state_ids_in) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); + : Automaton_Base(graph_in, unused_in) { + populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; @@ -272,7 +269,7 @@ class Automaton_Graph : public Automaton_Base { cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { dynamic_bitset<> temp(NFA_STATE_LIMIT); - markToppableStarts(graph, state_ids, false, triggers, &temp); + markToppableStarts(graph, unused, false, triggers, &temp); toppable = bitfield(temp); } } @@ -481,11 +478,10 @@ bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) { } static -s32 getSlotID(const NGHolder &g, - UNUSED const ue2::unordered_map &state_ids, +s32 getSlotID(const NGHolder &g, UNUSED const flat_set &unused, NFAVertex v) { if (is_triggered(g) && v == g.start) { - assert(state_ids.at(v) != NO_STATE); + assert(!contains(unused, v)); } else if (is_any_start_inc_virtual(v, g)) { return CREATE_NEW_SOM; } @@ -516,8 +512,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states, template static -void haig_do_report(const NGHolder &g, - const ue2::unordered_map &state_ids, +void haig_do_report(const NGHolder &g, const flat_set &unused, NFAVertex accept_v, const stateset &source_nfa_states, const vector &state_mapping, set &out) { @@ -528,7 +523,7 @@ void haig_do_report(const NGHolder &g, continue; } for (ReportID report_id : g[v].reports) { - out.insert(som_report(report_id, getSlotID(g, state_ids, v))); + out.insert(som_report(report_id, getSlotID(g, unused, v))); } } } @@ -565,14 +560,14 @@ void haig_note_starts(const NGHolder &g, map *out) { template static bool doHaig(const NGHolder &g, - const ue2::unordered_map &state_ids, + const flat_set &unused, som_type som, const vector> &triggers, bool unordered_som, raw_som_dfa *rdfa) { u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from a fight */ typedef typename Auto::StateSet StateSet; vector nfa_state_map; - Auto n(g, state_ids, som, triggers, unordered_som); + Auto n(g, unused, som, triggers, unordered_som); try { if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); @@ -602,9 +597,9 @@ bool doHaig(const NGHolder &g, haig_do_preds(g, source_states, n.v_by_index, rdfa->state_som.back().preds); - haig_do_report(g, state_ids, g.accept, source_states, n.v_by_index, + haig_do_report(g, unused, g.accept, source_states, n.v_by_index, rdfa->state_som.back().reports); - haig_do_report(g, state_ids, g.acceptEod, source_states, n.v_by_index, + haig_do_report(g, unused, g.acceptEod, source_states, n.v_by_index, rdfa->state_som.back().reports_eod); } @@ -613,10 +608,10 @@ bool doHaig(const NGHolder &g, return true; } -unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, - u32 somPrecision, - const vector > &triggers, - const Grey &grey, bool unordered_som) { +unique_ptr +attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, + const vector> &triggers, const Grey &grey, + bool unordered_som) { assert(is_triggered(g) != triggers.empty()); assert(!unordered_som || is_triggered(g)); @@ -625,13 +620,12 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, return nullptr; } - auto state_ids = numberStates(g); - dropUnusedStarts(g, state_ids); - DEBUG_PRINTF("attempting to build haig \n"); assert(allMatchStatesHaveReports(g)); assert(hasCorrectlyNumberedVertices(g)); + auto unused = findUnusedStates(g); + u32 numStates = num_vertices(g); if (numStates > HAIG_MAX_NFA_STATE) { DEBUG_PRINTF("giving up... looks too big\n"); @@ -645,11 +639,11 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, bool rv; if (numStates <= NFA_STATE_LIMIT) { /* fast path */ - rv = doHaig(g, state_ids, som, triggers, unordered_som, + rv = doHaig(g, unused, som, triggers, unordered_som, rdfa.get()); } else { /* not the fast path */ - rv = doHaig(g, state_ids, som, triggers, unordered_som, + rv = doHaig(g, unused, som, triggers, unordered_som, rdfa.get()); } diff --git a/src/nfagraph/ng_haig.h b/src/nfagraph/ng_haig.h index 1df5e2f0b..baff2f586 100644 --- a/src/nfagraph/ng_haig.h +++ b/src/nfagraph/ng_haig.h @@ -54,10 +54,10 @@ struct raw_som_dfa; * between) */ -std::unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, - u32 somPrecision, - const std::vector > &triggers, - const Grey &grey, bool unordered_som_triggers = false); +std::unique_ptr +attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, + const std::vector> &triggers, + const Grey &grey, bool unordered_som_triggers = false); std::unique_ptr attemptToMergeHaig(const std::vector &dfas, diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 245dbefe5..897162872 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -152,12 +152,11 @@ void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state, template static -void populateInit(const NGHolder &g, - const ue2::unordered_map &state_ids, +void populateInit(const NGHolder &g, const flat_set &unused, stateset *init, stateset *init_deep, vector *v_by_index) { for (auto v : vertices_range(g)) { - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } @@ -188,21 +187,22 @@ void populateInit(const NGHolder &g, } template -void populateAccepts(const NGHolder &g, - const ue2::unordered_map &state_ids, +void populateAccepts(const NGHolder &g, const flat_set &unused, StateSet *accept, StateSet *acceptEod) { for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (state_ids.at(v) != NO_STATE) { - accept->set(g[v].index); + if (contains(unused, v)) { + continue; } + accept->set(g[v].index); } for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { if (v == g.accept) { continue; } - if (state_ids.at(v) != NO_STATE) { - acceptEod->set(g[v].index); + if (contains(unused, v)) { + continue; } + acceptEod->set(g[v].index); } } @@ -315,8 +315,7 @@ bool triggerAllowed(const NGHolder &g, const NFAVertex v, return true; } -void markToppableStarts(const NGHolder &g, - const ue2::unordered_map &state_ids, +void markToppableStarts(const NGHolder &g, const flat_set &unused, bool single_trigger, const vector> &triggers, dynamic_bitset<> *out) { @@ -325,14 +324,13 @@ void markToppableStarts(const NGHolder &g, } for (auto v : vertices_range(g)) { - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } - u32 vert_id = g[v].index; for (const auto &trigger : triggers) { if (triggerAllowed(g, v, triggers, trigger)) { - DEBUG_PRINTF("idx %u is valid location for top\n", vert_id); - out->set(vert_id); + DEBUG_PRINTF("idx %u is valid location for top\n", g[v].index); + out->set(g[v].index); break; } } @@ -349,15 +347,14 @@ class Automaton_Big { typedef map StateMap; Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, - bool single_trigger, + const flat_set &unused_in, bool single_trigger, const vector> &triggers, bool prunable_in) - : rm(rm_in), graph(graph_in), state_ids(state_ids_in), - numStates(num_vertices(graph)), init(numStates), initDS(numStates), + : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), + unused(unused_in), init(numStates), initDS(numStates), squash(numStates), accept(numStates), acceptEod(numStates), toppable(numStates), prunable(prunable_in), dead(numStates) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); - populateAccepts(graph, state_ids, &accept, &acceptEod); + populateInit(graph, unused, &init, &initDS, &v_by_index); + populateAccepts(graph, unused, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; if (initDS == init) { @@ -379,7 +376,7 @@ class Automaton_Big { cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { - markToppableStarts(graph, state_ids, single_trigger, triggers, + markToppableStarts(graph, unused, single_trigger, triggers, &toppable); } } @@ -438,8 +435,8 @@ class Automaton_Big { const ReportManager *rm; public: const NGHolder &graph; - const ue2::unordered_map &state_ids; u32 numStates; + const flat_set &unused; vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -466,13 +463,11 @@ class Automaton_Graph { typedef ue2::unordered_map StateMap; Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, - bool single_trigger, + const flat_set &unused_in, bool single_trigger, const vector> &triggers, bool prunable_in) - : rm(rm_in), graph(graph_in), state_ids(state_ids_in), - prunable(prunable_in) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); - populateAccepts(graph, state_ids, &accept, &acceptEod); + : rm(rm_in), graph(graph_in), unused(unused_in), prunable(prunable_in) { + populateInit(graph, unused, &init, &initDS, &v_by_index); + populateAccepts(graph, unused, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; if (initDS == init) { @@ -496,8 +491,7 @@ class Automaton_Graph { cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { dynamic_bitset<> temp(NFA_STATE_LIMIT); - markToppableStarts(graph, state_ids, single_trigger, triggers, - &temp); + markToppableStarts(graph, unused, single_trigger, triggers, &temp); toppable = bitfield(temp); } } @@ -557,7 +551,7 @@ class Automaton_Graph { const ReportManager *rm; public: const NGHolder &graph; - const ue2::unordered_map &state_ids; + const flat_set &unused; vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -580,20 +574,15 @@ class Automaton_Graph { } // namespace -unique_ptr buildMcClellan(const NGHolder &g, const ReportManager *rm, - bool single_trigger, +unique_ptr buildMcClellan(const NGHolder &graph, + const ReportManager *rm, bool single_trigger, const vector> &triggers, const Grey &grey, bool finalChance) { if (!grey.allowMcClellan) { return nullptr; } - // Construct a mutable copy of the graph so that we can drop unused starts. - auto g_copy = cloneHolder(g); - NGHolder &graph = *g_copy; - - auto state_ids = numberStates(graph); - dropUnusedStarts(graph, state_ids); + auto unused = findUnusedStates(graph); DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); assert(allMatchStatesHaveReports(graph)); @@ -620,7 +609,7 @@ unique_ptr buildMcClellan(const NGHolder &g, const ReportManager *rm, if (numStates <= NFA_STATE_LIMIT) { /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ - Automaton_Graph n(rm, graph, state_ids, single_trigger, triggers, + Automaton_Graph n(rm, graph, unused, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); @@ -633,8 +622,7 @@ unique_ptr buildMcClellan(const NGHolder &g, const ReportManager *rm, rdfa->alpha_remap = n.alpha; } else { /* Slow path. Too many states to use Automaton_Graph. */ - Automaton_Big n(rm, graph, state_ids, single_trigger, triggers, - prunable); + Automaton_Big n(rm, graph, unused, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ diff --git a/src/nfagraph/ng_mcclellan_internal.h b/src/nfagraph/ng_mcclellan_internal.h index 13d32aa48..22fcf01e6 100644 --- a/src/nfagraph/ng_mcclellan_internal.h +++ b/src/nfagraph/ng_mcclellan_internal.h @@ -64,8 +64,7 @@ void getFullTransitionFromState(const raw_dfa &n, u16 state, u16 *out_table); /** produce a map of states on which it is valid to receive tops */ -void markToppableStarts(const NGHolder &g, - const ue2::unordered_map &state_ids, +void markToppableStarts(const NGHolder &g, const flat_set &unused, bool single_trigger, const std::vector> &triggers, boost::dynamic_bitset<> *out); @@ -76,7 +75,7 @@ void transition_graph(autom &nfa, const std::vector &vByStateId, typename autom::StateSet *next) { typedef typename autom::StateSet StateSet; const NGHolder &graph = nfa.graph; - const auto &state_ids = nfa.state_ids; + const auto &unused = nfa.unused; const auto &alpha = nfa.alpha; const StateSet &squash = nfa.squash; const std::map &squash_mask = nfa.squash_mask; @@ -94,7 +93,7 @@ void transition_graph(autom &nfa, const std::vector &vByStateId, NFAVertex u = vByStateId[i]; for (const auto &v : adjacent_vertices_range(u, graph)) { - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } succ.set(graph[v].index); diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 703443561..09abf775a 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -281,6 +281,17 @@ void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states) { } } +flat_set findUnusedStates(const NGHolder &g) { + flat_set dead; + if (startIsRedundant(g)) { + dead.insert(g.start); + } + if (proper_out_degree(g.startDs, g) == 0) { + dead.insert(g.startDs); + } + return dead; +} + /** Construct a reversed copy of an arbitrary NGHolder, mapping starts to * accepts. */ void reverseHolder(const NGHolder &g_in, NGHolder &g) { diff --git a/src/nfagraph/ng_restructuring.h b/src/nfagraph/ng_restructuring.h index ce95b96cd..5e244bf67 100644 --- a/src/nfagraph/ng_restructuring.h +++ b/src/nfagraph/ng_restructuring.h @@ -81,6 +81,13 @@ u32 countStates(const NGHolder &g, /** Optimisation: drop unnecessary start states. */ void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states); +/** + * \brief Returns a set of vertices that will not participate in an + * implementation (NFA, DFA etc) of this graph. For example, starts with no + * successors. + */ +flat_set findUnusedStates(const NGHolder &g); + } // namespace ue2 #endif From 18cfd11c0f6d748823faf1abc92b619793b1385d Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 29 Mar 2016 16:39:58 +1100 Subject: [PATCH 121/218] remove ENABLE_TOOLS_THREADS --- cmake/config.h.in | 3 --- 1 file changed, 3 deletions(-) diff --git a/cmake/config.h.in b/cmake/config.h.in index 9c8f8a275..aac8f64b3 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,9 +15,6 @@ /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT -/* Build tools with threading support */ -#cmakedefine ENABLE_TOOLS_THREADS - /* Define to 1 if `backtrace' works. */ #cmakedefine HAVE_BACKTRACE From 7a6c6f46ed4db4663be5531b2af10704c7f654ab Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 29 Mar 2016 16:41:27 +1100 Subject: [PATCH 122/218] remove HAVE_PTHREAD_H --- cmake/config.h.in | 3 --- 1 file changed, 3 deletions(-) diff --git a/cmake/config.h.in b/cmake/config.h.in index aac8f64b3..75c27b3e2 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -52,9 +52,6 @@ /* Define to 1 if `posix_memalign' works. */ #cmakedefine HAVE_POSIX_MEMALIGN -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_PTHREAD_H - /* Define to 1 if you have the `setrlimit' function. */ #cmakedefine HAVE_SETRLIMIT From e034ccb9b4d4e8d0528f19add1b0be38e916c4b0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 5 Apr 2016 15:18:33 +1000 Subject: [PATCH 123/218] mmbit_iterate_bounded: make block_width u64a --- src/util/multibit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/multibit.h b/src/util/multibit.h index 771c158d2..b2393f3f9 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -742,7 +742,7 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 for (;;) { assert(level <= max_level); - u32 block_width = MMB_KEY_BITS << ks; + u64a block_width = MMB_KEY_BITS << ks; u64a block_base = key * block_width; u64a block_min = MAX(it_start, block_base); u64a block_max = MIN(it_end, block_base + block_width - 1); From dd4c1eceb80f9f76551e6a4d37a39744608087f3 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 5 Apr 2016 10:13:34 +1000 Subject: [PATCH 124/218] Remove unused loadu2x128 --- src/util/simd_utils.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 8176e7e13..e115aa7ae 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -601,17 +601,6 @@ static really_inline m256 loadu256(const void *ptr) { #endif } -// unaligned load of 128-bit value to low and high part of 256-bit value -static really_inline m256 loadu2x128(const void *ptr) { -#if defined(__AVX2__) - return set2x128(loadu128(ptr)); -#else - m256 rv; - rv.hi = rv.lo = loadu128(ptr); - return rv; -#endif -} - // packed unaligned store of first N bytes static really_inline void storebytes256(void *ptr, m256 a, unsigned int n) { From 088347b437dc10b0e899c288c92627dab1b37cb5 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 5 Apr 2016 10:36:50 +1000 Subject: [PATCH 125/218] Remove unused storeu macros --- src/util/uniform_ops.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/util/uniform_ops.h b/src/util/uniform_ops.h index 5fa6ab56c..45ea41081 100644 --- a/src/util/uniform_ops.h +++ b/src/util/uniform_ops.h @@ -76,9 +76,6 @@ #define storeu_u32(ptr, a) unaligned_store_u32(ptr, a) #define storeu_u64a(ptr, a) unaligned_store_u64a(ptr, a) #define storeu_m128(ptr, a) storeu128(ptr, a) -#define storeu_m256(ptr, a) storeu256(ptr, a) -#define storeu_m384(ptr, a) storeu384(ptr, a) -#define storeu_m512(ptr, a) storeu512(ptr, a) #define zero_u8 0 #define zero_u32 0 From 3dc8bab78fecd9fc69a329c2359cab3288e11a00 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 6 Apr 2016 11:09:08 +1000 Subject: [PATCH 126/218] Build convenience libraries as static libs --- unit/CMakeLists.txt | 2 +- util/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 9afc95731..b3cc8cea2 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -25,7 +25,7 @@ if(CXX_WUNUSED_VARIABLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") endif() -add_library(gtest ${gtest_SOURCES}) +add_library(gtest STATIC ${gtest_SOURCES}) add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index c664ec25c..dc7313220 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -16,7 +16,7 @@ set(expressionutil_SRCS ExpressionParser.h ExpressionParser.cpp ) -add_library(expressionutil ${expressionutil_SRCS}) +add_library(expressionutil STATIC ${expressionutil_SRCS}) add_dependencies(expressionutil ragel_ExpressionParser) SET(corpusomatic_SRCS @@ -29,5 +29,5 @@ SET(corpusomatic_SRCS ng_find_matches.h ng_find_matches.cpp ) -add_library(corpusomatic ${corpusomatic_SRCS}) +add_library(corpusomatic STATIC ${corpusomatic_SRCS}) From 9852ac0091f3e91461caff123235fa7f68f401c6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 6 Apr 2016 14:44:22 +1000 Subject: [PATCH 127/218] mpv: use size_t for count, not u32 Small cleanup in processReportsForRange. --- src/nfa/mpv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index 5f4910a65..7ae15d245 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -176,7 +176,7 @@ char processReportsForRange(const struct mpv *m, u8 *reporters, return MO_CONTINUE_MATCHING; } - for (u32 i = 2; i <= length; i++) { + for (size_t i = 2; i <= length; i++) { for (u32 j = 0; j < rl_count; j++) { if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { DEBUG_PRINTF("bailing\n"); From 5354b7a5ca559232d4ad99f2394921ce776341e0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 6 Apr 2016 15:06:48 +1000 Subject: [PATCH 128/218] mpv: fire only one report when simple-exhaustible --- src/nfa/mpv.c | 6 ++++-- src/nfa/mpv_dump.cpp | 5 ++++- src/nfa/mpv_internal.h | 11 ++++++++++- src/nfa/mpvcompile.cpp | 9 ++++----- src/nfa/mpvcompile.h | 9 ++++++--- src/nfagraph/ng_puff.cpp | 10 +++++++--- 6 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index 7ae15d245..4bae7b186 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -124,7 +124,7 @@ char processReports(const struct mpv *m, u8 *reporters, DEBUG_PRINTF("report %u at %llu\n", curr->report, report_offset); - if (curr->unbounded) { + if (curr->unbounded && !curr->simple_exhaust) { assert(rl_count < m->puffette_count); *rl = curr->report; ++rl; @@ -176,6 +176,8 @@ char processReportsForRange(const struct mpv *m, u8 *reporters, return MO_CONTINUE_MATCHING; } + DEBUG_PRINTF("length=%zu, rl_count=%u\n", length, rl_count); + for (size_t i = 2; i <= length; i++) { for (u32 j = 0; j < rl_count; j++) { if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index e731df875..504cc6775 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -108,6 +108,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) { fprintf(f, " Puffette %u\n", i); fprintf(f, " repeats: %u%s\n", p[i].repeats, p[i].unbounded ? "," : ""); + if (p[i].simple_exhaust) { + fprintf(f, " simple exhaustible\n"); + } fprintf(f, " report id: %u\n", p[i].report); } diff --git a/src/nfa/mpv_internal.h b/src/nfa/mpv_internal.h index 7a1a22736..a52853dce 100644 --- a/src/nfa/mpv_internal.h +++ b/src/nfa/mpv_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,15 @@ struct mpv_puffette { u32 repeats; char unbounded; + + /** + * \brief Report is simple-exhaustible. + * + * If this is true, we do best-effort suppression of runs of reports, only + * delivering the first one. + */ + char simple_exhaust; + ReportID report; }; diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index e4741ef13..7521afef8 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,10 +53,8 @@ namespace ue2 { namespace { struct pcomp { bool operator()(const raw_puff &a, const raw_puff &b) const { - ORDER_CHECK(repeats); - ORDER_CHECK(unbounded); - ORDER_CHECK(report); - return false; + return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) < + tie(b.repeats, b.unbounded, b.simple_exhaust, b.report); } }; @@ -89,6 +87,7 @@ void writePuffette(mpv_puffette *out, const raw_puff &rp) { rp.report, out); out->repeats = rp.repeats; out->unbounded = rp.unbounded; + out->simple_exhaust = rp.simple_exhaust; out->report = rp.report; } diff --git a/src/nfa/mpvcompile.h b/src/nfa/mpvcompile.h index 01c0de79e..ff4906ee7 100644 --- a/src/nfa/mpvcompile.h +++ b/src/nfa/mpvcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,12 +42,15 @@ namespace ue2 { struct raw_puff { raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in, - const CharReach &reach_in, bool auto_restart_in = false) + const CharReach &reach_in, bool auto_restart_in = false, + bool simple_exhaust_in = false) : repeats(repeats_in), unbounded(unbounded_in), - auto_restart(auto_restart_in), report(report_in), reach(reach_in) {} + auto_restart(auto_restart_in), simple_exhaust(simple_exhaust_in), + report(report_in), reach(reach_in) {} u32 repeats; /**< report match after this many matching bytes */ bool unbounded; /**< keep producing matches after repeats are reached */ bool auto_restart; /**< for /[^X]{n}/ type patterns */ + bool simple_exhaust; /* first report will exhaust us */ ReportID report; CharReach reach; /**< = ~escapes */ }; diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 501d8f7b2..c8b6843d7 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -270,12 +270,16 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, DEBUG_PRINTF("constructing Puff for report %u\n", report); DEBUG_PRINTF("a = %u\n", g[a].index); + const Report &puff_report = rm.getReport(report); + const bool simple_exhaust = isSimpleExhaustible(puff_report); + const bool pureAnchored = a == g.start && singleStart(g); if (!pureAnchored) { if (a == g.startDs || a == g.start) { DEBUG_PRINTF("add outfix ar(false)\n"); - raw_puff rp(width, unbounded, report, cr, auto_restart); + raw_puff rp(width, unbounded, report, cr, auto_restart, + simple_exhaust); rose.addOutfix(rp); return; } @@ -300,7 +304,7 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, } else { DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart); assert(!auto_restart || unbounded); - raw_puff rp(width, unbounded, report, cr, auto_restart); + raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust); rose.addOutfix(rp); } } From 1fcd3cfb2cdb60ac9479f2ae0732c942da6a3a4f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Apr 2016 16:19:55 +1000 Subject: [PATCH 129/218] mmbit_all: True if all keys are switched on --- src/util/multibit.h | 80 ++++++++++++++++++++++++++++++++++++- unit/internal/multi_bit.cpp | 20 +++++++++- 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/src/util/multibit.h b/src/util/multibit.h index b2393f3f9..ddc8bbdd0 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -664,6 +664,84 @@ char mmbit_any_precise(const u8 *bits, u32 total_bits) { return mmbit_iterate_big(bits, total_bits, MMB_INVALID) != MMB_INVALID; } +static really_inline +char mmbit_all_flat(const u8 *bits, u32 total_bits) { + while (total_bits > MMB_KEY_BITS) { + if (mmb_load(bits) != MMB_ALL_ONES) { + return 0; + } + bits += sizeof(MMB_TYPE); + total_bits -= MMB_KEY_BITS; + } + while (total_bits > 8) { + if (*bits != 0xff) { + return 0; + } + bits++; + total_bits -= 8; + } + u8 mask = (u8)mmb_mask_zero_to_nocheck(total_bits); + return (*bits & mask) == mask; +} + +static really_inline +char mmbit_all_big(const u8 *bits, u32 total_bits) { + u32 ks = mmbit_keyshift(total_bits); + + u32 level = 0; + for (;;) { + // Number of bits we expect to see switched on on this level. + u32 level_bits; + if (ks != 0) { + u32 next_level_width = MMB_KEY_BITS << (ks - MMB_KEY_SHIFT); + level_bits = ROUNDUP_N(total_bits, next_level_width) >> ks; + } else { + level_bits = total_bits; + } + + const u8 *block_ptr = mmbit_get_level_root_const(bits, level); + + // All full-size blocks should be all-ones. + while (level_bits >= MMB_KEY_BITS) { + MMB_TYPE block = mmb_load(block_ptr); + if (block != MMB_ALL_ONES) { + return 0; + } + block_ptr += sizeof(MMB_TYPE); + level_bits -= MMB_KEY_BITS; + } + + // If we have bits remaining, we have a runt block on the end. + if (level_bits > 0) { + MMB_TYPE block = mmb_load(block_ptr); + MMB_TYPE mask = mmb_mask_zero_to_nocheck(level_bits); + if ((block & mask) != mask) { + return 0; + } + } + + if (ks == 0) { + break; + } + + ks -= MMB_KEY_SHIFT; + level++; + } + + return 1; +} + +/** \brief True if all keys are on. Guaranteed precise. */ +static really_inline +char mmbit_all(const u8 *bits, u32 total_bits) { + MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits); + + if (mmbit_is_flat_model(total_bits)) { + return mmbit_all_flat(bits, total_bits); + } + return mmbit_all_big(bits, total_bits); +} + static really_inline MMB_TYPE get_flat_masks(u32 base, u32 it_start, u32 it_end) { if (it_end <= base) { diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 3f5c59085..925092b3d 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -566,6 +566,24 @@ TEST_P(MultiBitTest, Any) { } } +TEST_P(MultiBitTest, All) { + SCOPED_TRACE(test_size); + ASSERT_TRUE(ba != nullptr); + + mmbit_clear(ba, test_size); + ASSERT_FALSE(mmbit_all(ba, test_size)); + + for (u64a i = 0; i < test_size - 1; i += stride) { + SCOPED_TRACE(i); + mmbit_set(ba, test_size, i); + ASSERT_FALSE(mmbit_all(ba, test_size)); + } + + // Set all bits. + fill_mmbit(ba, test_size); + ASSERT_TRUE(mmbit_all(ba, test_size)); +} + TEST_P(MultiBitTest, UnsetRange1) { SCOPED_TRACE(test_size); ASSERT_TRUE(ba != nullptr); From f2c0a66b6f91655ad6b287822879403cfc94b39b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Apr 2016 16:04:28 +1000 Subject: [PATCH 130/218] Rose: use a multibit for the exhaustion vector Previously, the exhaustion vector was a standard bitvector, which required an expensive memset() call at init for databases with a large number of exhaustion keys. --- src/report.h | 19 ++++++++------ src/rose/program_runtime.h | 2 +- src/rose/rose_build_bytecode.cpp | 4 +-- src/rose/runtime.h | 2 +- src/util/exhaust.h | 44 +++++++++----------------------- 5 files changed, 27 insertions(+), 44 deletions(-) diff --git a/src/report.h b/src/report.h index 96cea32e9..6f5cec1bf 100644 --- a/src/report.h +++ b/src/report.h @@ -243,7 +243,8 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, } } - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ir->ekey))) { + if (!is_simple && + unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); return MO_CONTINUE_MATCHING; } @@ -296,7 +297,7 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, } if (!is_simple && ir->ekey != END_EXHAUST) { - markAsMatched(ci->exhaustionVector, ir->ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; @@ -338,7 +339,8 @@ int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, assert(!ir->quashSom); #endif - assert(ekey == INVALID_EKEY || !isExhausted(ci->exhaustionVector, ekey)); + assert(ekey == INVALID_EKEY || + !isExhausted(ci->rose, ci->exhaustionVector, ekey)); u64a from_offset = 0; u64a to_offset = offset + offset_adjust; @@ -355,7 +357,7 @@ int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, } if (ekey != INVALID_EKEY) { - markAsMatched(ci->exhaustionVector, ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; @@ -398,7 +400,8 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, int halt = 0; - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ir->ekey))) { + if (!is_simple && + unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); goto exit; } @@ -444,7 +447,7 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, flags, ci->userContext); if (!is_simple) { - markAsMatched(ci->exhaustionVector, ir->ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); } exit: @@ -485,7 +488,7 @@ int roseDeliverSomReport(u64a from_offset, u64a to_offset, assert(!ir->hasBounds || (to_offset >= ir->minOffset && to_offset <= ir->maxOffset)); assert(ir->ekey == INVALID_EKEY || - !isExhausted(ci->exhaustionVector, ir->ekey)); + !isExhausted(ci->rose, ci->exhaustionVector, ir->ekey)); to_offset += ir->offsetAdjust; assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); @@ -509,7 +512,7 @@ int roseDeliverSomReport(u64a from_offset, u64a to_offset, if (is_exhaustible) { assert(ir->ekey != INVALID_EKEY); - markAsMatched(ci->exhaustionVector, ir->ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index d816d62e1..2dd3ba8bf 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1146,7 +1146,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(ri->ekey != INVALID_EKEY); assert(ri->ekey < t->ekeyCount); const char *evec = scratch->core_info.exhaustionVector; - if (isExhausted(evec, ri->ekey)) { + if (isExhausted(t, evec, ri->ekey)) { DEBUG_PRINTF("ekey %u already set, match is exhausted\n", ri->ekey); assert(ri->fail_jump); // must progress diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bcf42eed7..7fe29538a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -666,9 +666,9 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, so->history = curr_offset; curr_offset += historyRequired; - // Exhausted bit vector. + // Exhaustion multibit. so->exhausted = curr_offset; - curr_offset += ROUNDUP_N(tbi.rm.numEkeys(), 8) / 8; + curr_offset += mmbit_size(tbi.rm.numEkeys()); // SOM locations and valid/writeable multibit structures. if (tbi.ssm.numSomSlots()) { diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 275adfb4a..46ccc2a13 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -123,7 +123,7 @@ char roseSuffixInfoIsExhausted(const struct RoseEngine *t, const u32 *ekeys = (const u32 *)((const char *)t + info->ekeyListOffset); while (*ekeys != END_EXHAUST) { DEBUG_PRINTF("check %u\n", *ekeys); - if (!isExhausted(exhausted, *ekeys)) { + if (!isExhausted(t, exhausted, *ekeys)) { DEBUG_PRINTF("not exhausted -> alive\n"); return 0; } diff --git a/src/util/exhaust.h b/src/util/exhaust.h index e75d18092..3b5bff4ed 100644 --- a/src/util/exhaust.h +++ b/src/util/exhaust.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,8 +34,8 @@ #define EXHAUST_H #include "rose/rose_internal.h" +#include "util/multibit.h" #include "ue2common.h" -#include /** \brief Sentinel value meaning no further exhaustion keys. */ #define END_EXHAUST (~(u32)0) @@ -43,56 +43,36 @@ /** \brief Test whether the given key (\a eoff) is set in the exhaustion vector * \a evec. */ static really_inline -int isExhausted(const char *evec, u32 eoff) { +int isExhausted(const struct RoseEngine *t, const char *evec, u32 eoff) { DEBUG_PRINTF("checking exhaustion %p %u\n", evec, eoff); - return eoff != END_EXHAUST && (evec[eoff >> 3] & (1 << (eoff % 8))); + return eoff != END_EXHAUST && + mmbit_isset((const u8 *)evec, t->ekeyCount, eoff); } /** \brief Returns 1 if all exhaustion keys in the bitvector are on. */ static really_inline -int isAllExhausted(const struct RoseEngine *t, const char *evec_in) { +int isAllExhausted(const struct RoseEngine *t, const char *evec) { if (!t->canExhaust) { return 0; /* pattern set is inexhaustible */ } - const u8 *evec = (const u8 *)evec_in; - - u32 whole_bytes = t->ekeyCount / 8; - for (u32 i = 0; i < whole_bytes; i++) { - if (evec[i] != 0xff) { - DEBUG_PRINTF("unexhausted pattern in byte %u\n", i); - return 0; - } - } - - u32 rem = t->ekeyCount % 8; - if (t->ekeyCount % 8) { - u8 mask = (1 << rem) - 1; - if (evec[whole_bytes] != (char)mask) { - DEBUG_PRINTF("unexhausted pattern (%hhu) in final byte\n", mask); - return 0; - } - } - - DEBUG_PRINTF("pattern set is exhausted\n"); - return 1; + return mmbit_all((const u8 *)evec, t->ekeyCount); } /** \brief Mark key \a eoff on in the exhaustion vector. */ static really_inline -void markAsMatched(char *evec, u32 eoff) { +void markAsMatched(const struct RoseEngine *t, char *evec, u32 eoff) { if (eoff != END_EXHAUST) { DEBUG_PRINTF("marking as exhausted key %u\n", eoff); - evec[eoff >> 3] |= 1 << (eoff % 8); + mmbit_set((u8 *)evec, t->ekeyCount, eoff); } } /** \brief Clear all keys in the exhaustion vector. */ static really_inline -void clearEvec(char *ev, const struct RoseEngine *t) { - size_t size = (t->ekeyCount + 7) / 8; - DEBUG_PRINTF("clearing evec %p %zu\n", ev, size); - memset(ev, 0, size); +void clearEvec(char *evec, const struct RoseEngine *t) { + DEBUG_PRINTF("clearing evec %p %u\n", evec, t->ekeyCount); + mmbit_clear((u8 *)evec, t->ekeyCount); } #endif From d75cc809fe3a3af12229ce5bd1db1eae20dc79aa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 5 Apr 2016 13:25:39 +1000 Subject: [PATCH 131/218] Runtime: be more careful with status bits --- src/runtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index 8a0cb9f98..09c0deb68 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -667,7 +667,7 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, char *state = getMultiState(id); u8 status = getStreamStatus(state); - if (status == STATUS_TERMINATED || status == STATUS_EXHAUSTED) { + if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) { DEBUG_PRINTF("stream is broken, just freeing storage\n"); return; } @@ -802,7 +802,7 @@ void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { DEBUG_PRINTF("stream exhausted\n"); - scratch->core_info.status = STATUS_EXHAUSTED; + scratch->core_info.status |= STATUS_EXHAUSTED; } } From 50885f210a23f360dbc0043d56438727c573bdaf Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 7 Apr 2016 16:33:11 +1000 Subject: [PATCH 132/218] exhaust: Update interface - Only use functions in exhaust.h for valid ekeys - Use INVALID_EKEY everywhere (remove dupe END_EXHAUST sentinel) --- src/report.h | 8 ++++---- src/rose/runtime.h | 4 ++-- src/runtime.c | 4 ++-- src/util/exhaust.h | 29 ++++++++++++++--------------- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/report.h b/src/report.h index 6f5cec1bf..28560907c 100644 --- a/src/report.h +++ b/src/report.h @@ -243,7 +243,7 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, } } - if (!is_simple && + if (!is_simple && ir->ekey != INVALID_EKEY && unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); return MO_CONTINUE_MATCHING; @@ -296,7 +296,7 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, return MO_HALT_MATCHING; } - if (!is_simple && ir->ekey != END_EXHAUST) { + if (!is_simple && ir->ekey != INVALID_EKEY) { markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); return MO_CONTINUE_MATCHING; } else { @@ -400,7 +400,7 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, int halt = 0; - if (!is_simple && + if (!is_simple && ir->ekey != INVALID_EKEY && unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); goto exit; @@ -446,7 +446,7 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, halt = ci->userCallback((unsigned int)ir->onmatch, from_offset, to_offset, flags, ci->userContext); - if (!is_simple) { + if (!is_simple && ir->ekey != INVALID_EKEY) { markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); } diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 46ccc2a13..2716c3fa2 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -119,9 +119,9 @@ char roseSuffixInfoIsExhausted(const struct RoseEngine *t, DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset); - /* END_EXHAUST terminated list */ + /* INVALID_EKEY terminated list */ const u32 *ekeys = (const u32 *)((const char *)t + info->ekeyListOffset); - while (*ekeys != END_EXHAUST) { + while (*ekeys != INVALID_EKEY) { DEBUG_PRINTF("check %u\n", *ekeys); if (!isExhausted(t, exhausted, *ekeys)) { DEBUG_PRINTF("not exhausted -> alive\n"); diff --git a/src/runtime.c b/src/runtime.c index 09c0deb68..6bc601412 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -428,7 +428,7 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data, length, NULL, 0, 0, 0, flags); - clearEvec(scratch->core_info.exhaustionVector, rose); + clearEvec(rose, scratch->core_info.exhaustionVector); // Rose program execution (used for some report paths) depends on these // values being initialised. @@ -561,7 +561,7 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose) { setStreamStatus(state, 0); roseInitState(rose, state); - clearEvec((char *)state + rose->stateOffsets.exhausted, rose); + clearEvec(rose, state + rose->stateOffsets.exhausted); // SOM state multibit structures. initSomState(rose, state); diff --git a/src/util/exhaust.h b/src/util/exhaust.h index 3b5bff4ed..f7b7d6e95 100644 --- a/src/util/exhaust.h +++ b/src/util/exhaust.h @@ -34,19 +34,18 @@ #define EXHAUST_H #include "rose/rose_internal.h" +#include "util/internal_report.h" #include "util/multibit.h" #include "ue2common.h" -/** \brief Sentinel value meaning no further exhaustion keys. */ -#define END_EXHAUST (~(u32)0) - -/** \brief Test whether the given key (\a eoff) is set in the exhaustion vector +/** \brief Test whether the given key (\a ekey) is set in the exhaustion vector * \a evec. */ static really_inline -int isExhausted(const struct RoseEngine *t, const char *evec, u32 eoff) { - DEBUG_PRINTF("checking exhaustion %p %u\n", evec, eoff); - return eoff != END_EXHAUST && - mmbit_isset((const u8 *)evec, t->ekeyCount, eoff); +int isExhausted(const struct RoseEngine *t, const char *evec, u32 ekey) { + DEBUG_PRINTF("checking exhaustion %p %u\n", evec, ekey); + assert(ekey != INVALID_EKEY); + assert(ekey < t->ekeyCount); + return mmbit_isset((const u8 *)evec, t->ekeyCount, ekey); } /** \brief Returns 1 if all exhaustion keys in the bitvector are on. */ @@ -59,18 +58,18 @@ int isAllExhausted(const struct RoseEngine *t, const char *evec) { return mmbit_all((const u8 *)evec, t->ekeyCount); } -/** \brief Mark key \a eoff on in the exhaustion vector. */ +/** \brief Mark key \a ekey on in the exhaustion vector. */ static really_inline -void markAsMatched(const struct RoseEngine *t, char *evec, u32 eoff) { - if (eoff != END_EXHAUST) { - DEBUG_PRINTF("marking as exhausted key %u\n", eoff); - mmbit_set((u8 *)evec, t->ekeyCount, eoff); - } +void markAsMatched(const struct RoseEngine *t, char *evec, u32 ekey) { + DEBUG_PRINTF("marking as exhausted key %u\n", ekey); + assert(ekey != INVALID_EKEY); + assert(ekey < t->ekeyCount); + mmbit_set((u8 *)evec, t->ekeyCount, ekey); } /** \brief Clear all keys in the exhaustion vector. */ static really_inline -void clearEvec(char *evec, const struct RoseEngine *t) { +void clearEvec(const struct RoseEngine *t, char *evec) { DEBUG_PRINTF("clearing evec %p %u\n", evec, t->ekeyCount); mmbit_clear((u8 *)evec, t->ekeyCount); } From 956b86e97a2af565a7784e0bd0fb9097134a2822 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 7 Apr 2016 15:25:05 +1000 Subject: [PATCH 133/218] move MultibyteAccelInfo to accelcompile.h --- src/nfa/accelcompile.h | 22 ++++++++++++++++++++-- src/nfagraph/ng_limex_accel.h | 22 ++-------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index e9467531d..7bf7fe5dc 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,12 +32,30 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/ue2_containers.h" -#include "nfagraph/ng_limex_accel.h" union AccelAux; namespace ue2 { +struct MultibyteAccelInfo { + /* multibyte accel schemes, ordered by strength */ + enum multiaccel_type { + MAT_SHIFT, + MAT_SHIFTGRAB, + MAT_DSHIFT, + MAT_DSHIFTGRAB, + MAT_LONG, + MAT_LONGGRAB, + MAT_MAX, + MAT_NONE = MAT_MAX + }; + CharReach cr; + u32 offset = 0; + u32 len1 = 0; + u32 len2 = 0; + multiaccel_type type = MAT_NONE; +}; + struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), single_stops(CharReach::dot()), diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 61dfaed99..b9dba2e1e 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ #include "ng_holder.h" #include "ng_misc_opt.h" #include "ue2common.h" +#include "nfa/accelcompile.h" #include "util/charreach.h" #include "util/order_check.h" #include "util/ue2_containers.h" @@ -71,25 +72,6 @@ struct DoubleAccelInfo { DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v); -struct MultibyteAccelInfo { - /* multibyte accel schemes, ordered by strength */ - enum multiaccel_type { - MAT_SHIFT, - MAT_SHIFTGRAB, - MAT_DSHIFT, - MAT_DSHIFTGRAB, - MAT_LONG, - MAT_LONGGRAB, - MAT_MAX, - MAT_NONE = MAT_MAX - }; - CharReach cr; - u32 offset = 0; - u32 len1 = 0; - u32 len2 = 0; - multiaccel_type type = MAT_NONE; -}; - struct AccelScheme { AccelScheme(const CharReach &cr_in, u32 offset_in) : cr(cr_in), offset(offset_in) { From d12560115230e0a386305ca62f131d8801780813 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 21 Mar 2016 09:38:30 +1100 Subject: [PATCH 134/218] dump contents of double shufti masks --- src/nfa/accel_dump.cpp | 62 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 2370718ad..9e4fb7e97 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,16 +37,20 @@ #include "shufticompile.h" #include "trufflecompile.h" #include "ue2common.h" +#include "util/bitutils.h" #include "util/charreach.h" #include "util/dump_charclass.h" #include "util/dump_mask.h" #include +#include #ifndef DUMP_SUPPORT #error No dump support! #endif +using namespace std; + namespace ue2 { static @@ -146,6 +150,60 @@ void dumpShuftiCharReach(FILE *f, const m128 &lo, const m128 &hi) { describeClass(cr).c_str()); } +static +vector shufti2cr_array(const m128 lo_in, const m128 hi_in) { + const u8 *lo = (const u8 *)&lo_in; + const u8 *hi = (const u8 *)&hi_in; + vector crs(8); + for (u32 i = 0; i < 256; i++) { + u32 combined = lo[(u8)i & 0xf] & hi[(u8)i >> 4]; + while (combined) { + u32 j = findAndClearLSB_32(&combined); + crs.at(j).set(i); + } + } + return crs; +} + +static +void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1, + const m128 &lo2, const m128 &hi2) { + vector cr1 = shufti2cr_array(~lo1, ~hi1); + vector cr2 = shufti2cr_array(~lo2, ~hi2); + map > cr1_group; + assert(cr1.size() == 8 && cr2.size() == 8); + for (u32 i = 0; i < 8; i++) { + if (!cr1[i].any()) { + continue; + } + cr1_group[cr1[i]].insert(i); + } + map rev; + for (const auto &e : cr1_group) { + CharReach rhs; + for (u32 r : e.second) { + rhs |= cr2.at(r); + } + + rev[rhs] |= e.first; + } + fprintf(f, "escapes: {"); + for (auto it = rev.begin(); it != rev.end(); ++it) { + const auto &e = *it; + if (it != rev.begin()) { + fprintf(f, ", "); + } + + if (e.first.all()) { + fprintf(f, "%s", describeClass(e.second).c_str()); + } else { + fprintf(f, "%s%s", describeClass(e.second).c_str(), + describeClass(e.first).c_str()); + } + } + fprintf(f, "}\n"); +} + static void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) { fprintf(f, "lo %s\n", @@ -201,6 +259,8 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { dumpShuftiMasks(f, accel.dshufti.lo1, accel.dshufti.hi1); fprintf(f, "mask 2\n"); dumpShuftiMasks(f, accel.dshufti.lo2, accel.dshufti.hi2); + dumpDShuftiCharReach(f, accel.dshufti.lo1, accel.dshufti.hi1, + accel.dshufti.lo2, accel.dshufti.hi2); break; case ACCEL_TRUFFLE: { fprintf(f, "\n"); From f53c093baa4686ae06310fd1dce05a40853ff6ca Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 16 Feb 2016 13:37:39 +1100 Subject: [PATCH 135/218] simple offset accel for mcclellan start state --- CMakeLists.txt | 2 + src/nfa/goughcompile.cpp | 38 ++- src/nfa/mcclellancompile.cpp | 330 +++++++------------------ src/nfa/mcclellancompile.h | 24 +- src/nfa/mcclellancompile_accel.cpp | 383 +++++++++++++++++++++++++++++ src/nfa/mcclellancompile_accel.h | 64 +++++ src/nfa/mcclellancompile_util.cpp | 61 ++++- src/nfa/mcclellancompile_util.h | 7 +- src/nfagraph/ng_limex_accel.cpp | 96 ++++---- src/nfagraph/ng_limex_accel.h | 3 + src/util/dump_charclass.h | 6 +- 11 files changed, 698 insertions(+), 316 deletions(-) create mode 100644 src/nfa/mcclellancompile_accel.cpp create mode 100644 src/nfa/mcclellancompile_accel.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 36267fc61..e1bd27943 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -596,6 +596,8 @@ SET (hs_SRCS src/nfa/mcclellan_internal.h src/nfa/mcclellancompile.cpp src/nfa/mcclellancompile.h + src/nfa/mcclellancompile_accel.cpp + src/nfa/mcclellancompile_accel.h src/nfa/mcclellancompile_util.cpp src/nfa/mcclellancompile_util.h src/nfa/limex_compile.cpp diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index d735c80ac..2ad3c6dd6 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -85,10 +85,11 @@ class gough_build_strat : public mcclellan_build_strat { vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - void find_escape_strings(dstate_id_t this_idx, - escape_info *out) const override; + escape_info find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override { return sizeof(gough_accel); } - void buildAccel(dstate_id_t this_idx, void *accel_out) override; + void buildAccel(dstate_id_t this_idx, const escape_info &info, + void *accel_out) override; + u32 max_allowed_offset_accel() const override { return 0; } raw_som_dfa &rdfa; const GoughGraph ≫ @@ -1145,32 +1146,43 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, return gough_dfa; } -void gough_build_strat::find_escape_strings(dstate_id_t this_idx, - escape_info *out) const { +escape_info gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { + escape_info rv; if (!contains(accel_gough_info, this_idx)) { - out->outs = CharReach::dot(); - out->outs2_broken = true; - return; + rv.outs = CharReach::dot(); + rv.outs2_broken = true; + return rv; } - mcclellan_build_strat::find_escape_strings(this_idx, out); + rv = mcclellan_build_strat::find_escape_strings(this_idx); + + assert(!rv.offset); /* should have been limited by strat */ + if (rv.offset) { + rv.outs = CharReach::dot(); + rv.outs2_broken = true; + return rv; + } if (!accel_gough_info.at(this_idx).two_byte) { - out->outs2_broken = true; + rv.outs2_broken = true; } + + return rv; } -void gough_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { +void gough_build_strat::buildAccel(dstate_id_t this_idx, const escape_info &info, + void *accel_out) { assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); gough_accel *accel = (gough_accel *)accel_out; /* build a plain accelaux so we can work out where we can get to */ - mcclellan_build_strat::buildAccel(this_idx, &accel->accel); + mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel); DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx, accel->accel.accel_type); if (accel->accel.accel_type == ACCEL_NONE) { return; } + assert(!accel->accel.generic.offset); assert(contains(accel_gough_info, this_idx)); accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin); built_accel[accel] = this_idx; diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index f75d08b54..9b21b8c4a 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,8 @@ #include "accel.h" #include "grey.h" #include "mcclellan_internal.h" +#include "mcclellancompile_accel.h" +#include "mcclellancompile_util.h" #include "nfa_internal.h" #include "shufticompile.h" #include "trufflecompile.h" @@ -56,25 +58,18 @@ #include #include +#include + using namespace std; +using boost::adaptors::map_keys; namespace ue2 { -/* compile time accel defs */ -#define ACCEL_MAX_STOP_CHAR 160 /* larger than nfa, as we don't have a budget - and the nfa cheats on stop characters for - sets of states */ -#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ - - namespace /* anon */ { struct dstate_extra { - u16 daddytaken; - bool shermanState; - bool accelerable; - dstate_extra(void) : daddytaken(0), shermanState(false), - accelerable(false) {} + u16 daddytaken = 0; + bool shermanState = false; }; struct dfa_info { @@ -105,10 +100,6 @@ struct dfa_info { return extra[raw_id].shermanState; } - bool is_accel(dstate_id_t raw_id) const { - return extra[raw_id].accelerable; - } - size_t size(void) const { return states.size(); } }; @@ -135,6 +126,14 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) { return aux; } +static +bool double_byte_ok(const escape_info &info) { + return !info.outs2_broken + && info.outs2_single.count() + info.outs2.size() <= 8 + && info.outs2_single.count() < info.outs2.size() + && info.outs2_single.count() <= 2 && !info.outs2.empty(); +} + static void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { assert((size_t)succ_table % 2 == 0); @@ -186,75 +185,43 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { } } -void mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx, - escape_info *out) const { - const dstate &raw = rdfa.states[this_idx]; - const auto &alpha_remap = rdfa.alpha_remap; - - flat_set> outs2_local; - for (unsigned i = 0; i < N_CHARS; i++) { - outs2_local.clear(); - - if (raw.next[alpha_remap[i]] != this_idx) { - out->outs.set(i); - - DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]); - const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]]; - - if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { - DEBUG_PRINTF("leads to report\n"); - out->outs2_broken = true; /* cannot accelerate over reports */ - } - - for (unsigned j = 0; !out->outs2_broken && j < N_CHARS; j++) { - if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) { - continue; - } - - DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j, - raw_next.next[alpha_remap[j]]); - outs2_local.emplace((u8)i, (u8)j); - } +u32 mcclellan_build_strat::max_allowed_offset_accel() const { + return ACCEL_DFA_MAX_OFFSET_DEPTH; +} - if (outs2_local.size() > 8) { - DEBUG_PRINTF("adding %02x to outs2_single\n", i); - out->outs2_single.set(i); - } else { - insert(&out->outs2, outs2_local); - } - if (out->outs2.size() > 8) { - DEBUG_PRINTF("outs2 too big\n"); - out->outs2_broken = true; - } - } - } +escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) + const { + return find_mcclellan_escape_info(rdfa, this_idx, + max_allowed_offset_accel()); } /** builds acceleration schemes for states */ -void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { +void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, + const escape_info &info, + void *accel_out) { AccelAux *accel = (AccelAux *)accel_out; - escape_info out; - find_escape_strings(this_idx, &out); + DEBUG_PRINTF("accelerations scheme has offset %u\n", info.offset); + accel->generic.offset = verify_u8(info.offset); - if (!out.outs2_broken && out.outs2_single.none() - && out.outs2.size() == 1) { + if (double_byte_ok(info) && info.outs2_single.none() + && info.outs2.size() == 1) { accel->accel_type = ACCEL_DVERM; - accel->dverm.c1 = out.outs2.begin()->first; - accel->dverm.c2 = out.outs2.begin()->second; + accel->dverm.c1 = info.outs2.begin()->first; + accel->dverm.c2 = info.outs2.begin()->second; DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); return; } - if (!out.outs2_broken && out.outs2_single.none() - && (out.outs2.size() == 2 || out.outs2.size() == 4)) { + if (double_byte_ok(info) && info.outs2_single.none() + && (info.outs2.size() == 2 || info.outs2.size() == 4)) { bool ok = true; - assert(!out.outs2.empty()); - u8 firstC = out.outs2.begin()->first & CASE_CLEAR; - u8 secondC = out.outs2.begin()->second & CASE_CLEAR; + assert(!info.outs2.empty()); + u8 firstC = info.outs2.begin()->first & CASE_CLEAR; + u8 secondC = info.outs2.begin()->second & CASE_CLEAR; - for (const pair &p : out.outs2) { + for (const pair &p : info.outs2) { if ((p.first & CASE_CLEAR) != firstC || (p.second & CASE_CLEAR) != secondC) { ok = false; @@ -271,12 +238,9 @@ void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { } } - if (!out.outs2_broken && - (out.outs2_single.count() + out.outs2.size()) <= 8 && - out.outs2_single.count() < out.outs2.size() && - out.outs2_single.count() <= 2 && !out.outs2.empty()) { + if (double_byte_ok(info)) { accel->accel_type = ACCEL_DSHUFTI; - shuftiBuildDoubleMasks(out.outs2_single, out.outs2, + shuftiBuildDoubleMasks(info.outs2_single, info.outs2, &accel->dshufti.lo1, &accel->dshufti.hi1, &accel->dshufti.lo2, @@ -285,166 +249,46 @@ void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) { return; } - if (out.outs.none()) { + if (info.outs.none()) { accel->accel_type = ACCEL_RED_TAPE; DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" " from which there is no escape\n", this_idx); return; } - if (out.outs.count() == 1) { + if (info.outs.count() == 1) { accel->accel_type = ACCEL_VERM; - accel->verm.c = out.outs.find_first(); + accel->verm.c = info.outs.find_first(); DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); return; } - if (out.outs.count() == 2 && out.outs.isCaselessChar()) { + if (info.outs.count() == 2 && info.outs.isCaselessChar()) { accel->accel_type = ACCEL_VERM_NOCASE; - accel->verm.c = out.outs.find_first() & CASE_CLEAR; + accel->verm.c = info.outs.find_first() & CASE_CLEAR; DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); return; } - if (out.outs.count() > ACCEL_MAX_FLOATING_STOP_CHAR) { + if (info.outs.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { accel->accel_type = ACCEL_NONE; DEBUG_PRINTF("state %hu is too broad\n", this_idx); return; } accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(out.outs, &accel->shufti.lo, + if (-1 != shuftiBuildMasks(info.outs, &accel->shufti.lo, &accel->shufti.hi)) { DEBUG_PRINTF("state %hu is shufti\n", this_idx); return; } - assert(!out.outs.none()); + assert(!info.outs.none()); accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(out.outs, &accel->truffle.mask1, &accel->truffle.mask2); + truffleBuildMasks(info.outs, &accel->truffle.mask1, &accel->truffle.mask2); DEBUG_PRINTF("state %hu is truffle\n", this_idx); } -static -bool is_accel(const raw_dfa &raw, dstate_id_t sds_or_proxy, - dstate_id_t this_idx) { - if (!this_idx /* dead state is not accelerable */) { - return false; - } - - /* Note on report acceleration states: While we can't accelerate while we - * are spamming out callbacks, the QR code paths don't raise reports - * during scanning so they can accelerate report states. */ - - if (generates_callbacks(raw.kind) - && !raw.states[this_idx].reports.empty()) { - return false; - } - - size_t single_limit = this_idx == sds_or_proxy ? - ACCEL_MAX_FLOATING_STOP_CHAR : ACCEL_MAX_STOP_CHAR; - DEBUG_PRINTF("inspecting %hu/%hu: %zu\n", this_idx, sds_or_proxy, - single_limit); - - CharReach out; - for (u32 i = 0; i < N_CHARS; i++) { - if (raw.states[this_idx].next[raw.alpha_remap[i]] != this_idx) { - out.set(i); - } - } - - if (out.count() <= single_limit) { - DEBUG_PRINTF("state %hu should be accelerable %zu\n", this_idx, - out.count()); - return true; - } - - DEBUG_PRINTF("state %hu is not accelerable has %zu\n", this_idx, - out.count()); - - return false; -} - -static -bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { - u16 top_remap = raw.alpha_remap[TOP]; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - if (i != top_remap && raw.states[s].next[i] == s) { - return true; - } - } - return false; -} - -static -dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { - if (raw.start_floating != DEAD_STATE) { - DEBUG_PRINTF("has floating start\n"); - return raw.start_floating; - } - - DEBUG_PRINTF("looking for SDS proxy\n"); - - dstate_id_t s = raw.start_anchored; - - if (has_self_loop(s, raw)) { - return s; - } - - u16 top_remap = raw.alpha_remap[TOP]; - - ue2::unordered_set seen; - while (true) { - seen.insert(s); - DEBUG_PRINTF("basis %hu\n", s); - - /* check if we are connected to a state with a self loop */ - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t t = raw.states[s].next[i]; - if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { - return t; - } - } - - /* find a neighbour to use as a basis for looking for the sds proxy */ - dstate_id_t t = DEAD_STATE; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t tt = raw.states[s].next[i]; - if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { - t = tt; - break; - } - } - - if (t == DEAD_STATE) { - /* we were unable to find a state to use as a SDS proxy */ - return DEAD_STATE; - } - - s = t; - seen.insert(t); - } -} - -static -void populateAccelerationInfo(dfa_info &info, u32 *ac, const Grey &grey) { - *ac = 0; /* number of accelerable states */ - - if (!grey.accelerateDFA) { - return; - } - - dstate_id_t sds_proxy = get_sds_or_proxy(info.raw); - DEBUG_PRINTF("sds %hu\n", sds_proxy); - - for (size_t i = 0; i < info.size(); i++) { - if (is_accel(info.raw, sds_proxy, i)) { - ++*ac; - info.extra[i].accelerable = true; - } - } -} - static void populateBasicInfo(size_t state_size, const dfa_info &info, u32 total_size, u32 aux_offset, u32 accel_offset, @@ -625,6 +469,14 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, } } +static +void fillAccelOut(const map &accel_escape_info, + set *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + static size_t calcShermanRegionSize(const dfa_info &info) { size_t rv = 0; @@ -692,14 +544,14 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { static aligned_unique_ptr mcclellanCompile16(dfa_info &info, - const CompileContext &cc) { + const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 16\n"); vector reports; /* index in ri for the appropriate report list */ vector reports_eod; /* as above */ ReportID arb; u8 single; - u32 accelCount; u8 alphaShift = info.getAlphaShift(); assert(alphaShift <= 8); @@ -713,7 +565,8 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - populateAccelerationInfo(info, &accelCount, cc.grey); + map accel_escape_info + = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; @@ -721,7 +574,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, size_t aux_size = sizeof(mstate_aux) * info.size(); size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); - size_t accel_size = info.strat.accelSize() * accelCount; + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + ri->getReportListSize(), 32); size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); @@ -736,7 +589,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, char *nfa_base = (char *)nfa.get(); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, - accelCount, arb, single, nfa.get()); + accel_escape_info.size(), arb, single, nfa.get()); vector reportOffsets; @@ -769,12 +622,12 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets); - if (info.is_accel(i)) { + if (contains(accel_escape_info, i)) { this_aux->accel_offset = accel_offset; accel_offset += info.strat.accelSize(); assert(accel_offset + sizeof(NFA) <= sherman_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, + info.strat.buildAccel(i, accel_escape_info.at(i), (void *)((char *)m + this_aux->accel_offset)); } } @@ -798,12 +651,12 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); - if (info.is_accel(i)) { + if (contains(accel_escape_info, i)) { this_aux->accel_offset = accel_offset; accel_offset += info.strat.accelSize(); assert(accel_offset + sizeof(NFA) <= sherman_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, + info.strat.buildAccel(i, accel_escape_info.at(i), (void *)((char *)m + this_aux->accel_offset)); } @@ -836,6 +689,10 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, markEdges(nfa.get(), succ_table, info); + if (accel_states && nfa) { + fillAccelOut(accel_escape_info, accel_states); + } + return nfa; } @@ -874,7 +731,9 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, } static -void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) { +void allocateFSN8(dfa_info &info, + const map &accel_escape_info, + u16 *accel_limit, u16 *accept_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; @@ -886,7 +745,7 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) { for (u32 i = 1; i < info.size(); i++) { if (!info.states[i].reports.empty()) { accept.push_back(i); - } else if (info.is_accel(i)) { + } else if (contains(accel_escape_info, i)) { accel.push_back(i); } else { norm.push_back(i); @@ -915,23 +774,24 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) { static aligned_unique_ptr mcclellanCompile8(dfa_info &info, - const CompileContext &cc) { + const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 8\n"); vector reports; vector reports_eod; ReportID arb; u8 single; - u32 accelCount; unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - populateAccelerationInfo(info, &accelCount, cc.grey); + map accel_escape_info + = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); size_t aux_size = sizeof(mstate_aux) * info.size(); size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); - size_t accel_size = info.strat.accelSize() * accelCount; + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + ri->getReportListSize(), 32); size_t total_size = accel_offset + accel_size; @@ -951,9 +811,9 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - allocateFSN8(info, &m->accel_limit_8, &m->accept_limit_8); + allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8); populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, - accelCount, arb, single, nfa.get()); + accel_escape_info.size(), arb, single, nfa.get()); vector reportOffsets; @@ -964,13 +824,14 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); for (size_t i = 0; i < info.size(); i++) { - if (info.is_accel(i)) { + if (contains(accel_escape_info, i)) { u32 j = info.implId(i); aux[j].accel_offset = accel_offset; accel_offset += info.strat.accelSize(); - info.strat.buildAccel(i, (void *)((char *)m + aux[j].accel_offset)); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + aux[j].accel_offset)); } fillInBasicState8(info, aux, succ_table, reportOffsets, reports, @@ -981,6 +842,10 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, DEBUG_PRINTF("rl size %zu\n", ri->size()); + if (accel_states && nfa) { + fillAccelOut(accel_escape_info, accel_states); + } + return nfa; } @@ -1163,15 +1028,6 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } -static -void fillAccelOut(const dfa_info &info, set *accel_states) { - for (size_t i = 0; i < info.size(); i++) { - if (info.is_accel(i)) { - accel_states->insert(i); - } - } -} - aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, const CompileContext &cc, set *accel_states) { @@ -1200,19 +1056,15 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, aligned_unique_ptr nfa; if (!using8bit) { - nfa = mcclellanCompile16(info, cc); + nfa = mcclellanCompile16(info, cc, accel_states); } else { - nfa = mcclellanCompile8(info, cc); + nfa = mcclellanCompile8(info, cc, accel_states); } if (has_eod_reports) { nfa->flags |= NFA_ACCEPTS_EOD; } - if (accel_states && nfa) { - fillAccelOut(info, accel_states); - } - DEBUG_PRINTF("compile done\n"); return nfa; } diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 78126bc8d..d4b4325dc 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -59,6 +59,7 @@ struct escape_info { CharReach outs2_single; flat_set> outs2; bool outs2_broken = false; + u32 offset = 0; }; class dfa_build_strat { @@ -70,10 +71,10 @@ class dfa_build_strat { std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const = 0; - virtual void find_escape_strings(dstate_id_t this_idx, - escape_info *out) const = 0; + virtual escape_info find_escape_strings(dstate_id_t this_idx) const = 0; virtual size_t accelSize(void) const = 0; - virtual void buildAccel(dstate_id_t this_idx, void *accel_out) = 0; + virtual void buildAccel(dstate_id_t this_idx, const escape_info &info, + void *accel_out) = 0; }; class mcclellan_build_strat : public dfa_build_strat { @@ -81,14 +82,15 @@ class mcclellan_build_strat : public dfa_build_strat { explicit mcclellan_build_strat(raw_dfa &r) : rdfa(r) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( - std::vector &reports /* out */, - std::vector &reports_eod /* out */, - u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const override; - void find_escape_strings(dstate_id_t this_idx, - escape_info *out) const override; + std::vector &reports /* out */, + std::vector &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const override; + escape_info find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override; - void buildAccel(dstate_id_t this_idx, void *accel_out) override; + void buildAccel(dstate_id_t this_idx,const escape_info &info, + void *accel_out) override; + virtual u32 max_allowed_offset_accel() const; private: raw_dfa &rdfa; diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp new file mode 100644 index 000000000..12a05aaa0 --- /dev/null +++ b/src/nfa/mcclellancompile_accel.cpp @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcclellancompile_accel.h" + +#include "mcclellancompile_util.h" + +#include "grey.h" +#include "nfagraph/ng_limex_accel.h" +#include "util/charreach.h" +#include "util/container.h" +#include "util/dump_charclass.h" + +#include +#include + +#define PATHS_LIMIT 500 + +using namespace std; + +namespace ue2 { + +namespace { + +struct path { + vector reach; + dstate_id_t dest = DEAD_STATE; + explicit path(dstate_id_t base) : dest(base) {} +}; + +} + +static UNUSED +string describeClasses(const vector &v) { + std::ostringstream oss; + for (const auto &cr : v) { + describeClass(oss, cr); + } + return oss.str(); +} + +static +void dump_paths(const vector &paths) { + for (UNUSED const auto &p : paths) { + DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest); + } + DEBUG_PRINTF("%zu paths\n", paths.size()); +} + +static +bool is_useful_path(const vector &good, const path &p) { + for (const auto &g : good) { + assert(g.dest == p.dest); + assert(g.reach.size() <= p.reach.size()); + auto git = g.reach.rbegin(); + auto pit = p.reach.rbegin(); + + for (; git != g.reach.rend(); ++git, ++pit) { + if (!pit->isSubsetOf(*git)) { + goto next; + } + } + DEBUG_PRINTF("better: [%s] -> %u\n", + describeClasses(g.reach).c_str(), g.dest); + + return false; + next:; + } + + return true; +} + +static +path append(const path &orig, const CharReach &cr, u32 new_dest) { + path p(new_dest); + p.reach = orig.reach; + p.reach.push_back(cr); + + return p; +} + +static +void extend(const raw_dfa &rdfa, const path &p, + map > &all, + vector &out) { + dstate s = rdfa.states[p.dest]; + + if (!p.reach.empty() && p.reach.back().none()) { + out.push_back(p); + return; + } + + if (!s.reports.empty()) { + if (generates_callbacks(rdfa.kind)) { + out.push_back(p); + return; + } else { + path pp = append(p, CharReach(), p.dest); + all[p.dest].push_back(pp); + out.push_back(pp); + } + } + + if (!s.reports_eod.empty()) { + path pp = append(p, CharReach(), p.dest); + all[p.dest].push_back(pp); + out.push_back(pp); + } + + map dest; + for (unsigned i = 0; i < N_CHARS; i++) { + u32 succ = s.next[rdfa.alpha_remap[i]]; + dest[succ].set(i); + } + + for (const auto &e : dest) { + path pp = append(p, e.second, e.first); + if (!is_useful_path(all[e.first], pp)) { + DEBUG_PRINTF("not useful: [%s] -> %u\n", + describeClasses(pp.reach).c_str(), pp.dest); + continue; + } + + DEBUG_PRINTF("----good: [%s] -> %u\n", + describeClasses(pp.reach).c_str(), pp.dest); + all[e.first].push_back(pp); + out.push_back(pp); + } +} + +static +vector > generate_paths(const raw_dfa &rdfa, dstate_id_t base, + u32 len) { + vector paths{ path(base) }; + map > all; + all[base].push_back(path(base)); + for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { + vector next_gen; + for (const auto &p : paths) { + extend(rdfa, p, all, next_gen); + } + + paths = move(next_gen); + } + + dump_paths(paths); + + vector > rv; + for (auto &p : paths) { + rv.push_back(move(p.reach)); + } + return rv; +} + +escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, + u32 max_allowed_accel_offset) { + DEBUG_PRINTF("looking for accel for %hu\n", base); + vector > paths = generate_paths(rdfa, base, + max_allowed_accel_offset + 1); + AccelScheme as = findBestAccelScheme(paths, CharReach()); + escape_info rv; + rv.outs2_broken = true; + rv.offset = as.offset; + rv.outs = as.cr; + DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); + return rv; +} + + +static +vector find_nonexit_symbols(const raw_dfa &rdfa, + const CharReach &escape) { + set rv; + CharReach nonexit = ~escape; + for (auto i = nonexit.find_first(); i != CharReach::npos; + i = nonexit.find_next(i)) { + rv.insert(rdfa.alpha_remap[i]); + } + + return vector(rv.begin(), rv.end()); +} + +static +set find_region(const raw_dfa &rdfa, dstate_id_t base, + const escape_info &ei) { + DEBUG_PRINTF("looking for region around %hu\n", base); + + set region = {base}; + + if (!ei.outs2_broken) { + return region; + } + + DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.outs).c_str(), ei.offset); + + const CharReach &escape = ei.outs; + auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); + + vector pending = {base}; + while (!pending.empty()) { + dstate_id_t curr = pending.back(); + pending.pop_back(); + for (auto s : nonexit_symbols) { + dstate_id_t t = rdfa.states[curr].next[s]; + if (contains(region, t)) { + continue; + } + + DEBUG_PRINTF(" %hu is in region\n", t); + region.insert(t); + pending.push_back(t); + } + } + + return region; +} + +static +bool better(const escape_info &a, const escape_info &b) { + if (!a.outs2_broken && b.outs2_broken) { + return true; + } + + if (!b.outs2_broken) { + return false; + } + + return a.outs.count() < b.outs.count(); +} + +map populateAccelerationInfo(const raw_dfa &rdfa, + const dfa_build_strat &strat, + const Grey &grey) { + map rv; + if (!grey.accelerateDFA) { + return rv; + } + + dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); + DEBUG_PRINTF("sds %hu\n", sds_proxy); + + for (size_t i = 0; i < rdfa.states.size(); i++) { + escape_info ei = strat.find_escape_strings(i); + + if (i == DEAD_STATE) { + continue; + } + + /* Note on report acceleration states: While we can't accelerate while we + * are spamming out callbacks, the QR code paths don't raise reports + * during scanning so they can accelerate report states. */ + if (generates_callbacks(rdfa.kind) + && !rdfa.states[i].reports.empty()) { + continue; + } + + size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR + : ACCEL_DFA_MAX_STOP_CHAR; + DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); + + if (ei.outs.count() > single_limit) { + DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, + ei.outs.count()); + continue; + } + + DEBUG_PRINTF("state %zu should be accelerable %zu\n", + i, ei.outs.count()); + + rv[i] = ei; + } + + /* provide accleration states to states in the region of sds */ + if (contains(rv, sds_proxy)) { + auto sds_region = find_region(rdfa, sds_proxy, rv[sds_proxy]); + for (auto s : sds_region) { + if (!contains(rv, s) || better(rv[sds_proxy], rv[s])) { + rv[s] = rv[sds_proxy]; + } + } + } + + return rv; +} + +static +bool double_byte_ok(const escape_info &info) { + return !info.outs2_broken + && info.outs2_single.count() + info.outs2.size() <= 8 + && info.outs2_single.count() < info.outs2.size() + && info.outs2_single.count() <= 2 && !info.outs2.empty(); +} + +escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, + dstate_id_t this_idx, + u32 max_allowed_accel_offset) { + escape_info rv; + const dstate &raw = rdfa.states[this_idx]; + const auto &alpha_remap = rdfa.alpha_remap; + + flat_set> outs2_local; + for (unsigned i = 0; i < N_CHARS; i++) { + outs2_local.clear(); + + if (raw.next[alpha_remap[i]] != this_idx) { + rv.outs.set(i); + + DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]); + const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]]; + + if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { + DEBUG_PRINTF("leads to report\n"); + rv.outs2_broken = true; /* cannot accelerate over reports */ + } + + for (unsigned j = 0; !rv.outs2_broken && j < N_CHARS; j++) { + if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) { + continue; + } + + DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j, + raw_next.next[alpha_remap[j]]); + outs2_local.emplace((u8)i, (u8)j); + } + + if (outs2_local.size() > 8) { + DEBUG_PRINTF("adding %02x to outs2_single\n", i); + rv.outs2_single.set(i); + } else { + insert(&rv.outs2, outs2_local); + } + if (rv.outs2.size() > 8) { + DEBUG_PRINTF("outs2 too big\n"); + rv.outs2_broken = true; + } + } + } + + DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); + DEBUG_PRINTF("broken %d\n", rv.outs2_broken); + if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) + && this_idx == rdfa.start_floating + && this_idx != DEAD_STATE) { + DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); + auto offset = look_for_offset_accel(rdfa, this_idx, + max_allowed_accel_offset); + DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(), + rv.outs.count()); + if (offset.outs.count() < rv.outs.count()) { + DEBUG_PRINTF("using offset accel\n"); + rv = offset; + } + } + + return rv; +} + +} diff --git a/src/nfa/mcclellancompile_accel.h b/src/nfa/mcclellancompile_accel.h new file mode 100644 index 000000000..1e14c2cd1 --- /dev/null +++ b/src/nfa/mcclellancompile_accel.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCCLELLANCOMPILE_ACCEL_H +#define MCCLELLANCOMPILE_ACCEL_H + +#include "mcclellancompile.h" + +#include + +namespace ue2 { + +struct Grey; + +#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 + +/** Maximum tolerated number of escape character from an accel state. + * This is larger than nfa, as we don't have a budget and the nfa cheats on stop + * characters for sets of states */ +#define ACCEL_DFA_MAX_STOP_CHAR 160 + +/** Maximum tolerated number of escape character from a sds accel state. Larger + * than normal states as accelerating sds is important. Matches NFA value */ +#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 + +escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, + u32 max_allowed_accel_offset); + +std::map populateAccelerationInfo(const raw_dfa &rdfa, + const dfa_build_strat &strat, + const Grey &grey); + +escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, + dstate_id_t this_idx, + u32 max_allowed_accel_offset); + +} + +#endif diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index cd85ef367..2c9465208 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -334,4 +334,63 @@ size_t hash_dfa(const raw_dfa &rdfa) { return v; } +static +bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { + u16 top_remap = raw.alpha_remap[TOP]; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + if (i != top_remap && raw.states[s].next[i] == s) { + return true; + } + } + return false; +} + +dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { + if (raw.start_floating != DEAD_STATE) { + DEBUG_PRINTF("has floating start\n"); + return raw.start_floating; + } + + DEBUG_PRINTF("looking for SDS proxy\n"); + + dstate_id_t s = raw.start_anchored; + + if (has_self_loop(s, raw)) { + return s; + } + + u16 top_remap = raw.alpha_remap[TOP]; + + ue2::unordered_set seen; + while (true) { + seen.insert(s); + DEBUG_PRINTF("basis %hu\n", s); + + /* check if we are connected to a state with a self loop */ + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t t = raw.states[s].next[i]; + if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { + return t; + } + } + + /* find a neighbour to use as a basis for looking for the sds proxy */ + dstate_id_t t = DEAD_STATE; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t tt = raw.states[s].next[i]; + if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { + t = tt; + break; + } + } + + if (t == DEAD_STATE) { + /* we were unable to find a state to use as a SDS proxy */ + return DEAD_STATE; + } + + s = t; + } +} + } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 183abcaaa..7015893b5 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,14 +29,13 @@ #ifndef MCCLELLAN_COMPILE_UTIL_H #define MCCLELLAN_COMPILE_UTIL_H +#include "rdfa.h" #include "ue2common.h" #include namespace ue2 { -struct raw_dfa; - u32 remove_leading_dots(raw_dfa &raw); void prune_overlong(raw_dfa &raw, u32 max_offset); std::set all_reports(const raw_dfa &rdfa); @@ -50,6 +49,8 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa); /** \brief Compute a simple hash of this raw_dfa, including its reports. */ size_t hash_dfa(const raw_dfa &rdfa); +dstate_id_t get_sds_or_proxy(const raw_dfa &raw); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index ed9f5bfe9..41eda35da 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -464,16 +464,13 @@ void dumpPaths(const vector > &paths) { #endif static -void blowoutPathsLessStrictSegment(vector > *paths) { +void blowoutPathsLessStrictSegment(vector > &paths) { /* paths segments which are a superset of an earlier segment should never be * picked as an acceleration segment -> to improve processing just replace * with dot */ - for (vector >::iterator p = paths->begin(); - p != paths->end(); ++p) { - for (vector::iterator it = p->begin(); it != p->end(); - ++it) { - vector::iterator jt = it; - for (++jt; jt != p->end(); ++jt) { + for (auto &p : paths) { + for (auto it = p.begin(); it != p.end(); ++it) { + for (auto jt = next(it); jt != p.end(); ++jt) { if (it->isSubsetOf(*jt)) { *jt = CharReach::dot(); } @@ -483,10 +480,10 @@ void blowoutPathsLessStrictSegment(vector > *paths) { } static -void unifyPathsLastSegment(vector > *paths) { +void unifyPathsLastSegment(vector > &paths) { /* try to unify paths which only differ in the last segment */ - for (vector >::iterator p = paths->begin(); - p != paths->end() && p + 1 != paths->end();) { + for (vector >::iterator p = paths.begin(); + p != paths.end() && p + 1 != paths.end();) { vector &a = *p; vector &b = *(p + 1); @@ -504,7 +501,7 @@ void unifyPathsLastSegment(vector > *paths) { if (i == a.size() - 1) { /* we can unify these paths */ a[i] |= b[i]; - paths->erase(p + 1); + paths.erase(p + 1); } else { ++p; } @@ -512,23 +509,59 @@ void unifyPathsLastSegment(vector > *paths) { } static -void improvePaths(vector > *paths) { +void improvePaths(vector > &paths) { #ifdef DEBUG DEBUG_PRINTF("orig paths\n"); - dumpPaths(*paths); + dumpPaths(paths); #endif blowoutPathsLessStrictSegment(paths); - sort(paths->begin(), paths->end()); + sort(paths.begin(), paths.end()); unifyPathsLastSegment(paths); #ifdef DEBUG DEBUG_PRINTF("opt paths\n"); - dumpPaths(*paths); + dumpPaths(paths); #endif } +AccelScheme findBestAccelScheme(vector > paths, + const CharReach &terminating) { + improvePaths(paths); + + DEBUG_PRINTF("we have %zu paths\n", paths.size()); + if (paths.size() > 40) { + return AccelScheme(); /* too many paths to explore */ + } + + /* if we were smart we would do something netflowy on the paths to find the + * best cut. But we aren't, so we will just brute force it. + */ + AccelScheme curr(terminating, 0U); + AccelScheme best; + findBest(paths.begin(), paths.end(), curr, &best); + + /* find best is a bit lazy in terms of minimising the offset, see if we can + * make it better. need to find the min max offset that we need.*/ + u32 offset = 0; + for (vector >::iterator p = paths.begin(); + p != paths.end(); ++p) { + u32 i = 0; + for (vector::iterator it = p->begin(); it != p->end(); + ++it, i++) { + if (it->isSubsetOf(best.cr)) { + break; + } + } + offset = MAX(offset, i); + } + assert(offset <= best.offset); + best.offset = offset; + + return best; +} + AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, const vector &refined_cr, const map &br_cyclic, @@ -579,36 +612,7 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, reverse(it->begin(), it->end()); } - improvePaths(&paths); - DEBUG_PRINTF("we have %zu paths\n", paths.size()); - if (paths.size() > 40) { - return AccelScheme(); /* too many paths to explore */ - } - - /* if we were smart we would do something netflowy on the paths to find the - * best cut. But we aren't, so we will just brute force it. - */ - AccelScheme curr(terminating, 0U); - AccelScheme best; - findBest(paths.begin(), paths.end(), curr, &best); - - /* find best is a bit lazy in terms of minimising the offset, see if we can - * make it better. need to find the min max offset that we need.*/ - u32 offset = 0; - for (vector >::iterator p = paths.begin(); - p != paths.end(); ++p) { - u32 i = 0; - for (vector::iterator it = p->begin(); it != p->end(); - ++it, i++) { - if (it->isSubsetOf(best.cr)) { - break; - } - } - offset = MAX(offset, i); - } - assert(offset <= best.offset); - best.offset = offset; - return best; + return findBestAccelScheme(std::move(paths), terminating); } NFAVertex get_sds_or_proxy(const NGHolder &g) { diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index b9dba2e1e..113b216c2 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -110,6 +110,9 @@ AccelScheme nfaFindAccel(const NGHolder &g, const std::vector &verts, const std::map &br_cyclic, bool allow_wide); +AccelScheme findBestAccelScheme(std::vector > paths, + const CharReach &terminating); + /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::vector &refined_cr, diff --git a/src/util/dump_charclass.h b/src/util/dump_charclass.h index d2a718804..9c3362bc6 100644 --- a/src/util/dump_charclass.h +++ b/src/util/dump_charclass.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,8 +48,8 @@ enum cc_output_t { class CharReach; -void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength, - enum cc_output_t out_type); +void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16, + enum cc_output_t out_type = CC_OUT_TEXT); std::string describeClass(const CharReach &cr, size_t maxLength = 16, enum cc_output_t out_type = CC_OUT_TEXT); From 6898dc9864b932b92c2218ee5c7e7b6122fc887a Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 4 Mar 2016 14:20:53 +1100 Subject: [PATCH 136/218] look for normal accel schemes using compressed alpha --- src/nfa/mcclellancompile_accel.cpp | 82 +++++++++++++++++++----------- src/nfa/mcclellancompile_accel.h | 3 -- 2 files changed, 53 insertions(+), 32 deletions(-) diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 12a05aaa0..67f4a39ee 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -177,6 +177,7 @@ vector > generate_paths(const raw_dfa &rdfa, dstate_id_t base, return rv; } +static escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, u32 max_allowed_accel_offset) { DEBUG_PRINTF("looking for accel for %hu\n", base); @@ -191,7 +192,6 @@ escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, return rv; } - static vector find_nonexit_symbols(const raw_dfa &rdfa, const CharReach &escape) { @@ -253,6 +253,17 @@ bool better(const escape_info &a, const escape_info &b) { return a.outs.count() < b.outs.count(); } +static +vector reverse_alpha_remapping(const raw_dfa &rdfa) { + vector rv(rdfa.alpha_size - 1); /* TOP not required */ + + for (u32 i = 0; i < N_CHARS; i++) { + rv.at(rdfa.alpha_remap[i]).set(i); + } + + return rv; +} + map populateAccelerationInfo(const raw_dfa &rdfa, const dfa_build_strat &strat, const Grey &grey) { @@ -321,44 +332,57 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, u32 max_allowed_accel_offset) { escape_info rv; const dstate &raw = rdfa.states[this_idx]; - const auto &alpha_remap = rdfa.alpha_remap; + const vector rev_map = reverse_alpha_remapping(rdfa); - flat_set> outs2_local; - for (unsigned i = 0; i < N_CHARS; i++) { - outs2_local.clear(); + for (u32 i = 0; i < rev_map.size(); i++) { + if (raw.next[i] == this_idx) { + continue; + } - if (raw.next[alpha_remap[i]] != this_idx) { - rv.outs.set(i); + const CharReach &cr_i = rev_map.at(i); - DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]); - const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]]; + rv.outs |= cr_i; - if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { - DEBUG_PRINTF("leads to report\n"); - rv.outs2_broken = true; /* cannot accelerate over reports */ - } + DEBUG_PRINTF("next is %hu\n", raw.next[i]); + const dstate &raw_next = rdfa.states[raw.next[i]]; - for (unsigned j = 0; !rv.outs2_broken && j < N_CHARS; j++) { - if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) { - continue; - } + if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { + DEBUG_PRINTF("leads to report\n"); + rv.outs2_broken = true; /* cannot accelerate over reports */ + } - DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j, - raw_next.next[alpha_remap[j]]); - outs2_local.emplace((u8)i, (u8)j); - } + if (rv.outs2_broken) { + continue; + } - if (outs2_local.size() > 8) { - DEBUG_PRINTF("adding %02x to outs2_single\n", i); - rv.outs2_single.set(i); - } else { - insert(&rv.outs2, outs2_local); + CharReach cr_all_j; + for (u32 j = 0; j < rev_map.size(); j++) { + if (raw_next.next[j] == raw.next[j]) { + continue; } - if (rv.outs2.size() > 8) { - DEBUG_PRINTF("outs2 too big\n"); - rv.outs2_broken = true; + + DEBUG_PRINTF("adding sym %u sym %u -> %hu to 2 \n", i, j, + raw_next.next[j]); + cr_all_j |= rev_map.at(j); + } + + if (cr_i.count() * cr_all_j.count() > 8) { + DEBUG_PRINTF("adding sym %u to outs2_single\n", i); + rv.outs2_single |= cr_i; + } else { + for (auto ii = cr_i.find_first(); ii != CharReach::npos; + ii = cr_i.find_next(ii)) { + for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; + jj = cr_all_j.find_next(jj)) { + rv.outs2.emplace((u8)ii, (u8)jj); + } } } + + if (rv.outs2.size() > 8) { + DEBUG_PRINTF("outs2 too big\n"); + rv.outs2_broken = true; + } } DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); diff --git a/src/nfa/mcclellancompile_accel.h b/src/nfa/mcclellancompile_accel.h index 1e14c2cd1..aa1672b00 100644 --- a/src/nfa/mcclellancompile_accel.h +++ b/src/nfa/mcclellancompile_accel.h @@ -48,9 +48,6 @@ struct Grey; * than normal states as accelerating sds is important. Matches NFA value */ #define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 -escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, - u32 max_allowed_accel_offset); - std::map populateAccelerationInfo(const raw_dfa &rdfa, const dfa_build_strat &strat, const Grey &grey); From 89d7728f77bc7a5ee985f1137a5978d384ead9d0 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 10 Mar 2016 09:58:28 +1100 Subject: [PATCH 137/218] refactoring of double byte offset accel to use paths and add to mcclellan --- src/nfa/accelcompile.cpp | 3 +- src/nfa/accelcompile.h | 2 + src/nfa/limex_compile.cpp | 80 +++---- src/nfa/mcclellancompile.cpp | 6 +- src/nfa/mcclellancompile.h | 1 + src/nfa/mcclellancompile_accel.cpp | 24 +- src/nfagraph/ng_limex_accel.cpp | 366 +++++++++++++---------------- src/nfagraph/ng_limex_accel.h | 54 ++++- 8 files changed, 273 insertions(+), 263 deletions(-) diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 5739618a3..6f3b6e8a9 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,7 +94,6 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs); } -static bool isCaselessDouble(const flat_set> &stop) { // test for vector containing if (stop.size() != 4) { diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 7bf7fe5dc..d479a5457 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -56,6 +56,8 @@ struct MultibyteAccelInfo { multiaccel_type type = MAT_NONE; }; +bool isCaselessDouble(const flat_set> &stop); + struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), single_stops(CharReach::dot()), diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 7fa01d8a9..5d51feb9b 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -566,12 +566,29 @@ bool containsBadSubset(const limex_accel_info &accel, } static -void doAccelCommon(NGHolder &g, - ue2::unordered_map &accel_map, - const ue2::unordered_map &state_ids, - const map &br_cyclic, - const u32 num_states, limex_accel_info *accel, - const CompileContext &cc) { +bool is_too_wide(const AccelScheme &as) { + return as.cr.count() > MAX_MERGED_ACCEL_STOPS; +} + +static +void fillAccelInfo(build_info &bi) { + if (!bi.do_accel) { + return; + } + + NGHolder &g = bi.h; + limex_accel_info &accel = bi.accel; + unordered_map &accel_map = accel.accel_map; + const map &br_cyclic = bi.br_cyclic; + const CompileContext &cc = bi.cc; + const unordered_map &state_ids = bi.state_ids; + const u32 num_states = bi.num_states; + + nfaFindAccelSchemes(g, br_cyclic, &accel_map); + filterAccelStates(g, bi.tops, &accel_map); + + assert(accel_map.size() <= NFA_MAX_ACCEL_STATES); + vector refined_cr = reduced_cr(g, br_cyclic); vector astates; @@ -602,7 +619,7 @@ void doAccelCommon(NGHolder &g, } } - if (containsBadSubset(*accel, state_set, effective_sds)) { + if (containsBadSubset(accel, state_set, effective_sds)) { DEBUG_PRINTF("accel %u has bad subset\n", i); continue; /* if a subset failed to build we would too */ } @@ -610,19 +627,20 @@ void doAccelCommon(NGHolder &g, const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy); AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic, - allow_wide); - if (as.cr.count() > MAX_MERGED_ACCEL_STOPS) { + allow_wide, true); + if (is_too_wide(as)) { DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i, as.cr.count(), MAX_MERGED_ACCEL_STOPS); continue; } - DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset); + DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, + as.double_offset); // try multibyte acceleration first MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc); - precalcAccel &pa = accel->precalc[state_set]; + precalcAccel &pa = accel.precalc[state_set]; useful |= state_set; // if we successfully built a multibyte accel scheme, use that @@ -635,17 +653,11 @@ void doAccelCommon(NGHolder &g, pa.single_offset = as.offset; pa.single_cr = as.cr; - - if (states.size() == 1) { - DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front()); - if (pa.single_cr.count() > b.stop1.count()) { - /* insert this information into the precalc accel info as it is - * better than the single scheme */ - pa.double_offset = b.offset; - pa.double_lits = b.stop2; - pa.double_cr = b.stop1; - } - } + if (as.double_byte.size() != 0) { + pa.double_offset = as.double_offset; + pa.double_lits = as.double_byte; + pa.double_cr = as.double_cr; + }; } for (const auto &m : accel_map) { @@ -663,31 +675,19 @@ void doAccelCommon(NGHolder &g, state_set.set(state_id); bool is_multi = false; - auto p_it = accel->precalc.find(state_set); - if (p_it != accel->precalc.end()) { + auto p_it = accel.precalc.find(state_set); + if (p_it != accel.precalc.end()) { const precalcAccel &pa = p_it->second; offset = max(pa.double_offset, pa.single_offset); is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE; assert(offset <= MAX_ACCEL_DEPTH); } - accel->accelerable.insert(v); - if (!is_multi) - findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]); - } -} - -static -void fillAccelInfo(build_info &bi) { - if (!bi.do_accel) { - return; + accel.accelerable.insert(v); + if (!is_multi) { + findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); + } } - - nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map); - filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map); - assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES); - doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic, - bi.num_states, &bi.accel, bi.cc); } /** The AccelAux structure has large alignment specified, and this makes some diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 9b21b8c4a..279f454e7 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -201,7 +201,8 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, void *accel_out) { AccelAux *accel = (AccelAux *)accel_out; - DEBUG_PRINTF("accelerations scheme has offset %u\n", info.offset); + DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, + info.outs2_offset); accel->generic.offset = verify_u8(info.offset); if (double_byte_ok(info) && info.outs2_single.none() @@ -209,6 +210,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, accel->accel_type = ACCEL_DVERM; accel->dverm.c1 = info.outs2.begin()->first; accel->dverm.c2 = info.outs2.begin()->second; + accel->dverm.offset = verify_u8(info.outs2_offset); DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); return; } @@ -233,6 +235,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, accel->accel_type = ACCEL_DVERM_NOCASE; accel->dverm.c1 = firstC; accel->dverm.c2 = secondC; + accel->dverm.offset = verify_u8(info.outs2_offset); DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); return; } @@ -240,6 +243,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, if (double_byte_ok(info)) { accel->accel_type = ACCEL_DSHUFTI; + accel->dshufti.offset = verify_u8(info.outs2_offset); shuftiBuildDoubleMasks(info.outs2_single, info.outs2, &accel->dshufti.lo1, &accel->dshufti.hi1, diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index d4b4325dc..8dcc161bc 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -60,6 +60,7 @@ struct escape_info { flat_set> outs2; bool outs2_broken = false; u32 offset = 0; + u32 outs2_offset = 0; }; class dfa_build_strat { diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 67f4a39ee..471d0d53b 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -183,11 +183,18 @@ escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, DEBUG_PRINTF("looking for accel for %hu\n", base); vector > paths = generate_paths(rdfa, base, max_allowed_accel_offset + 1); - AccelScheme as = findBestAccelScheme(paths, CharReach()); + AccelScheme as = findBestAccelScheme(paths, CharReach(), true); escape_info rv; - rv.outs2_broken = true; rv.offset = as.offset; rv.outs = as.cr; + if (!as.double_byte.empty()) { + rv.outs2_single = as.double_cr; + rv.outs2 = as.double_byte; + rv.outs2_offset = as.double_offset; + rv.outs2_broken = false; + } else { + rv.outs2_broken = true; + } DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); return rv; } @@ -308,10 +315,15 @@ map populateAccelerationInfo(const raw_dfa &rdfa, /* provide accleration states to states in the region of sds */ if (contains(rv, sds_proxy)) { - auto sds_region = find_region(rdfa, sds_proxy, rv[sds_proxy]); + escape_info sds_ei = rv[sds_proxy]; + sds_ei.outs2_broken = true; /* region based on single byte scheme + * may differ from double byte */ + DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", + sds_ei.outs.count()); + auto sds_region = find_region(rdfa, sds_proxy, sds_ei); for (auto s : sds_region) { - if (!contains(rv, s) || better(rv[sds_proxy], rv[s])) { - rv[s] = rv[sds_proxy]; + if (!contains(rv, s) || better(sds_ei, rv[s])) { + rv[s] = sds_ei; } } } @@ -395,7 +407,7 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, max_allowed_accel_offset); DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(), rv.outs.count()); - if (offset.outs.count() < rv.outs.count()) { + if (double_byte_ok(offset) || offset.outs.count() < rv.outs.count()) { DEBUG_PRINTF("using offset accel\n"); rv = offset; } diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 41eda35da..63ec546ba 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -132,199 +132,6 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, } } -static -void buildTwoByteStops(flat_set> &twobyte, const CharReach &cr1, - const CharReach &cr2) { - for (size_t c1 = cr1.find_first(); c1 != cr1.npos; c1 = cr1.find_next(c1)) { - for (size_t c2 = cr2.find_first(); c2 != cr2.npos; - c2 = cr2.find_next(c2)) { - twobyte.emplace((u8)c1, (u8)c2); - } - } -} - -static -void findStopLiteralsAtVertex(NFAVertex v, const NGHolder &g, - DoubleAccelInfo &build) { - DEBUG_PRINTF("state %u\n", g[v].index); - - // double-byte accel is possible: calculate all single- and double-byte - // accel literals. - const CharReach &cr1 = g[v].char_reach; - - if (edge(v, g.accept, g).second) { - // If this first byte is an accept state, it must contribute a - // single-byte escape. We can still go on and calculate additional - // double-byte ones, though. - /* TODO: fix for rose */ - build.stop1 |= cr1; - } - - flat_set> twobyte; // for just this starting state - bool single = false; - - for (auto w : adjacent_vertices_range(v, g)) { - if (w == g.accept || w == g.acceptEod) { - continue; - } - const CharReach &cr2 = g[w].char_reach; - size_t count = cr1.count() * cr2.count() + build.stop2.size(); - if (count > 0 && count <= 8) { // can't do more than 8 two-byte - buildTwoByteStops(twobyte, cr1, cr2); - } else { - // two many two-byte literals, add the first byte as single - single = true; - break; - } - } - - if (single || twobyte.empty()) { - assert(!cr1.none()); - build.stop1 |= cr1; - } else { - assert(!twobyte.empty()); - build.stop2.insert(twobyte.begin(), twobyte.end()); - } -} - -static -bool is_bit5_insensitive(const flat_set> &stop) { - if (stop.size() != 4) { - return false; - } - - const u8 a = stop.begin()->first & CASE_CLEAR; - const u8 b = stop.begin()->second & CASE_CLEAR; - - for (flat_set>::const_iterator it = stop.begin(); - it != stop.end(); ++it) { - if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) { - return false; - } - } - - return true; -} - -static -bool is_dverm(const DoubleAccelInfo &a) { - if (a.stop1.any()) { - return false; - } - - if (a.stop2.size() == 1) { - return true; - } - - return is_bit5_insensitive(a.stop2); -} - -static -bool is_double_better(const DoubleAccelInfo &a, const DoubleAccelInfo &b) { - /* Note: this is not an operator< */ - - if (a.stop2.empty()) { - return false; - } - - if (b.stop2.empty()) { - return true; - } - - if (a.stop1.count() > b.stop1.count()) { - return false; - } - - if (a.stop1.count() < b.stop1.count()) { - return true; - } - - bool a_dvm = is_dverm(a); - bool b_dvm = is_dverm(b); - - if (b_dvm && !a_dvm) { - return false; - } - - if (!b_dvm && a_dvm) { - return true; - } - - if (a.stop2.size() > b.stop2.size()) { - return false; - } - - if (a.stop2.size() < b.stop2.size()) { - return true; - } - - return a.offset < b.offset; -} - -/** \brief Find the escape literals for a two byte accel at the given accel - * offset */ -static -void findDoubleAccel(const NGHolder &g, NFAVertex v, u32 accel_offset, - DoubleAccelInfo &build) { - DEBUG_PRINTF("find double accel +%u for vertex %u\n", accel_offset, - g[v].index); - build.offset = accel_offset; - - // Our accel state contributes single-byte escapes - build.stop1 |= ~g[v].char_reach; - - flat_set searchStates; // states that contribute stop literals - searchStates.insert(v); /* TODO: verify */ - - /* Note: We cannot search past an accepting state */ - /* TODO: remove restriction for non-callback generating */ - flat_set nextStates; - - insert(&nextStates, adjacent_vertices(v, g)); - nextStates.erase(v); - nextStates.erase(g.accept); - nextStates.erase(g.acceptEod); - - searchStates.swap(nextStates); - nextStates.clear(); - - // subsequent iterations are simpler, just follow all edges - for (u32 j = 1; j <= accel_offset; j++) { - for (auto u : searchStates) { - insert(&nextStates, adjacent_vertices(u, g)); - if (edge(u, g.accept, g).second) { - nextStates.clear(); - break; - } - nextStates.erase(g.accept); - nextStates.erase(g.acceptEod); - } - - searchStates.swap(nextStates); - nextStates.clear(); - } - - vector sorted; - insert(&sorted, sorted.end(), searchStates); - sort(sorted.begin(), sorted.end(), make_index_ordering(g)); - for (auto sv : sorted) { - findStopLiteralsAtVertex(sv, g, build); - } -} - -DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v) { - DoubleAccelInfo rv; - for (u32 offset = 0; offset <= MAX_ACCEL_DEPTH; offset++) { - DoubleAccelInfo b_temp; - findDoubleAccel(g, v, offset, b_temp); - if (is_double_better(b_temp, rv)) { - rv = b_temp; - } - } - - return rv; -} - static void findPaths(const NGHolder &g, NFAVertex v, const vector &refined_cr, @@ -384,8 +191,13 @@ void findPaths(const NGHolder &g, NFAVertex v, } static -AccelScheme merge(const AccelScheme &a, const AccelScheme &b) { - return AccelScheme(a.cr | b.cr, MAX(a.offset, b.offset)); +AccelScheme merge(AccelScheme a, const AccelScheme &b) { + a.cr |= b.cr; + ENSURE_AT_LEAST(&a.offset, b.offset); + a.double_cr |= b.double_cr; + insert(&a.double_byte, b.double_byte); + ENSURE_AT_LEAST(&a.double_offset, b.double_offset); + return a; } static @@ -445,8 +257,106 @@ void findBest(vector >::const_iterator pb, } } -#ifdef DEBUG +static +AccelScheme make_double_accel(AccelScheme as, CharReach cr_1, + const CharReach &cr_2_in, u32 offset_in) { + cr_1 &= ~as.double_cr; + CharReach cr_2 = cr_2_in & ~as.double_cr; + u32 offset = offset_in; + + if (cr_1.none()) { + DEBUG_PRINTF("empty first element\n"); + as.double_offset = offset; + return as; + } + + if (cr_2_in != cr_2 || cr_2.none()) { + offset = offset_in + 1; + } + + size_t two_count = cr_1.count() * cr_2.count(); + + DEBUG_PRINTF("will generate raw %zu pairs\n", two_count); + + if (!two_count) { + DEBUG_PRINTF("empty element\n"); + as.double_offset = offset; + return as; + } + + if (two_count > 8) { + if (cr_2.count() < cr_1.count()) { + as.double_cr |= cr_2; + offset = offset_in + 1; + } else { + as.double_cr |= cr_1; + } + } else { + for (auto i = cr_1.find_first(); i != CharReach::npos; + i = cr_1.find_next(i)) { + for (auto j = cr_2.find_first(); j != CharReach::npos; + j = cr_2.find_next(j)) { + as.double_byte.insert(make_pair(i, j)); + } + } + } + + as.double_offset = offset; + DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", + as.double_byte.size(), as.double_cr.count(), as.offset); + return as; +} +static +void findDoubleBest(vector >::const_iterator pb, + vector >::const_iterator pe, + const AccelScheme &curr, AccelScheme *best) { + assert(curr.offset <= MAX_ACCEL_DEPTH); + DEBUG_PRINTF("paths left %zu\n", pe - pb); + if (pb == pe) { + *best = curr; + return; + } + + DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); + + vector priority_path; + u32 i = 0; + for (vector::const_iterator p = pb->begin(); + p != pb->end() && next(p) != pb->end(); + ++p, i++) { + priority_path.push_back(make_double_accel(curr, *p, *next(p), i)); + } + + sort(priority_path.begin(), priority_path.end()); + + DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", + best->double_byte.size(), best->double_cr.count(), + best->offset); + + for (vector::const_iterator it = priority_path.begin(); + it != priority_path.end(); ++it) { + + AccelScheme in = merge(curr, *it); + DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", + in.double_byte.size(), in.double_cr.count(), in.offset); + + if (in > *best) { + DEBUG_PRINTF("worse\n"); + continue; + } + AccelScheme temp = *best; + findDoubleBest(pb + 1, pe, in, &temp); + if (temp < *best) { + *best = temp; + DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", + best->double_byte.size(), best->double_cr.count(), + best->offset); + } + } +} + +#ifdef DEBUG static void dumpPaths(const vector > &paths) { for (vector >::const_iterator p = paths.begin(); @@ -526,13 +436,56 @@ void improvePaths(vector > &paths) { #endif } +#define MAX_DOUBLE_ACCEL_PATHS 10 + +static +AccelScheme findBestDoubleAccelScheme(vector > paths, + const CharReach &terminating) { + DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n", + terminating.count()); + unifyPathsLastSegment(paths); + AccelScheme curr; + curr.double_cr = terminating; + curr.offset = 0; + /* if there are too many paths, shorten the paths to reduce the number of + * distinct paths we have to consider */ + while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) { + for (auto &p : paths) { + if (p.empty()) { + return curr; + } + p.pop_back(); + } + unifyPathsLastSegment(paths); + } + + if (paths.empty()) { + return curr; + } + + AccelScheme best; + best.double_cr = CharReach::dot(); + findDoubleBest(paths.begin(), paths.end(), curr, &best); + curr = best; + DEBUG_PRINTF("da %zu pairs, %zu singles\n", curr.double_byte.size(), + curr.double_cr.count()); + return curr; +} + AccelScheme findBestAccelScheme(vector > paths, - const CharReach &terminating) { + const CharReach &terminating, + bool look_for_double_byte) { + AccelScheme da; + + if (look_for_double_byte) { + da = findBestDoubleAccelScheme(paths, terminating); + } + improvePaths(paths); DEBUG_PRINTF("we have %zu paths\n", paths.size()); if (paths.size() > 40) { - return AccelScheme(); /* too many paths to explore */ + return da; /* too many paths to explore */ } /* if we were smart we would do something netflowy on the paths to find the @@ -559,13 +512,21 @@ AccelScheme findBestAccelScheme(vector > paths, assert(offset <= best.offset); best.offset = offset; + /* merge best single and best double */ + if (!da.double_byte.empty() && da.double_byte.size() <= 8 + && da.double_cr.count() < best.cr.count()) { + best.double_byte = da.double_byte; + best.double_cr = da.double_cr; + best.double_offset = da.double_offset; + } + return best; } AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, const vector &refined_cr, const map &br_cyclic, - bool allow_wide) { + bool allow_wide, bool look_for_double_byte) { CharReach terminating; for (auto v : verts) { if (!hasSelfLoop(v, g)) { @@ -612,7 +573,8 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, reverse(it->begin(), it->end()); } - return findBestAccelScheme(std::move(paths), terminating); + return findBestAccelScheme(std::move(paths), terminating, + look_for_double_byte); } NFAVertex get_sds_or_proxy(const NGHolder &g) { @@ -903,9 +865,9 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, } } - // Look for one byte accel schemes verm/shufti; + // Look for offset accel schemes verm/shufti; vector verts(1, v); - *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide); + *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true); DEBUG_PRINTF("as width %zu\n", as->cr.count()); return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide; } diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 113b216c2..80b3f0ecb 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -63,15 +63,6 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, u32 offset, ue2::flat_set *friends); -struct DoubleAccelInfo { - DoubleAccelInfo() : offset(0) {} - u32 offset; //!< offset correction to apply - CharReach stop1; //!< single-byte accel stop literals - flat_set> stop2; //!< double-byte accel stop literals -}; - -DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v); - struct AccelScheme { AccelScheme(const CharReach &cr_in, u32 offset_in) : cr(cr_in), offset(offset_in) { @@ -84,6 +75,36 @@ struct AccelScheme { // Don't use ORDER_CHECK as it will (stupidly) eval count() too many // times. + size_t a_dcount = double_cr.count(); + size_t b_dcount = b.double_cr.count(); + + bool feasible_double_a + = !a.double_byte.empty() && a.double_byte.size() <= 8; + bool feasible_double_b + = !b.double_byte.empty() && b.double_byte.size() <= 8; + + if (feasible_double_a != feasible_double_b) { + return feasible_double_a > feasible_double_b; + } + + if (feasible_double_a) { + if (a_dcount != b_dcount) { + return a_dcount < b_dcount; + } + + if ((a.double_byte.size() == 1) != (b.double_byte.size() == 1)) { + return a.double_byte.size() < b.double_byte.size(); + } + + bool cd_a = isCaselessDouble(a.double_byte); + bool cd_b = isCaselessDouble(b.double_byte); + if (cd_a != cd_b) { + return cd_a > cd_b; + } + ORDER_CHECK(double_byte.size()); + ORDER_CHECK(double_offset); + } + const size_t a_count = cr.count(), b_count = b.cr.count(); if (a_count != b_count) { return a_count < b_count; @@ -92,6 +113,9 @@ struct AccelScheme { /* TODO: give bonus if one is a 'caseless' character */ ORDER_CHECK(offset); ORDER_CHECK(cr); + ORDER_CHECK(double_byte); + ORDER_CHECK(double_cr); + ORDER_CHECK(double_offset); return false; } @@ -99,8 +123,11 @@ struct AccelScheme { return b < *this; } + ue2::flat_set > double_byte; CharReach cr; + CharReach double_cr; u32 offset; + u32 double_offset = 0; }; NFAVertex get_sds_or_proxy(const NGHolder &g); @@ -108,12 +135,15 @@ NFAVertex get_sds_or_proxy(const NGHolder &g); AccelScheme nfaFindAccel(const NGHolder &g, const std::vector &verts, const std::vector &refined_cr, const std::map &br_cyclic, - bool allow_wide); + bool allow_wide, bool look_for_double_byte = false); AccelScheme findBestAccelScheme(std::vector > paths, - const CharReach &terminating); + const CharReach &terminating, + bool look_for_double_byte = false); -/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ +/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a + * single byte accel scheme is found it is placed into *as + */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::vector &refined_cr, const std::map &br_cyclic, From b4727cf1ea01df1dc55fee1cf986d00d30f0237d Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 21 Mar 2016 16:19:46 +1100 Subject: [PATCH 138/218] masked version of dverm --- src/nfa/accel.c | 14 ++- src/nfa/accel.h | 9 +- src/nfa/accel_dump.cpp | 6 ++ src/nfa/accelcompile.cpp | 59 +++++++++++ src/nfa/accelcompile.h | 6 +- src/nfa/limex_accel.c | 11 ++- src/nfa/mcclellancompile.cpp | 15 +++ src/nfa/vermicelli.h | 37 ++++++- src/nfa/vermicelli_sse.h | 39 +++++++- src/nfagraph/ng_limex_accel.h | 10 +- unit/internal/vermicelli.cpp | 178 +++++++++++++++++++++++++++++++++- 11 files changed, 371 insertions(+), 13 deletions(-) diff --git a/src/nfa/accel.c b/src/nfa/accel.c index a8fc4e36a..8a8694a87 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -84,6 +84,18 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { c_end - 1); break; + case ACCEL_DVERM_MASKED: + DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); + if (c + 16 + 1 >= c_end) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = vermicelliDoubleMaskedExec(accel->dverm.c1, accel->dverm.c2, + accel->dverm.m1, accel->dverm.m2, + c, c_end - 1); + break; + case ACCEL_SHUFTI: DEBUG_PRINTF("accel shufti %p %p\n", c, c_end); if (c + 15 >= c_end) { diff --git a/src/nfa/accel.h b/src/nfa/accel.h index af0295665..a13563b68 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,7 +87,10 @@ enum AccelType { ACCEL_MSTRUFFLE, ACCEL_MSGTRUFFLE, ACCEL_MDSTRUFFLE, - ACCEL_MDSGTRUFFLE + ACCEL_MDSGTRUFFLE, + /* masked dverm */ + ACCEL_DVERM_MASKED, + }; /** \brief Structure for accel framework. */ @@ -107,6 +110,8 @@ union AccelAux { u8 offset; u8 c1; // uppercase if nocase u8 c2; // uppercase if nocase + u8 m1; // masked variant + u8 m2; // masked variant } dverm; struct { u8 accel_type; diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 9e4fb7e97..40abd12c3 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -66,6 +66,8 @@ const char *accelName(u8 accel_type) { return "double-vermicelli"; case ACCEL_DVERM_NOCASE: return "double-vermicelli nocase"; + case ACCEL_DVERM_MASKED: + return "double-vermicelli masked"; case ACCEL_RVERM: return "reverse vermicelli"; case ACCEL_RVERM_NOCASE: @@ -247,6 +249,10 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { case ACCEL_RDVERM_NOCASE: fprintf(f, " [\\x%02hhx\\x%02hhx]\n", accel.dverm.c1, accel.dverm.c2); break; + case ACCEL_DVERM_MASKED: + fprintf(f, " [\\x%02hhx\\x%02hhx] & [\\x%02hhx\\x%02hhx]\n", + accel.dverm.c1, accel.dverm.c2, accel.dverm.m1, accel.dverm.m2); + break; case ACCEL_SHUFTI: { fprintf(f, "\n"); dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi); diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 6f3b6e8a9..a9281c132 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -94,6 +94,48 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs); } +bool buildDvermMask(const flat_set> &escape_set, u8 *m1_out, + u8 *m2_out) { + u8 a1 = 0xff; + u8 a2 = 0xff; + u8 b1 = 0xff; + u8 b2 = 0xff; + + for (const auto &e : escape_set) { + DEBUG_PRINTF("%0hhx %0hhx\n", e.first, e.second); + a1 &= e.first; + b1 &= ~e.first; + a2 &= e.second; + b2 &= ~e.second; + } + + u8 m1 = a1 | b1; + u8 m2 = a2 | b2; + + u32 holes1 = 8 - popcount32(m1); + u32 holes2 = 8 - popcount32(m2); + + DEBUG_PRINTF("aaaa %0hhx %0hhx\n", a1, a2); + DEBUG_PRINTF("bbbb %0hhx %0hhx\n", b1, b2); + DEBUG_PRINTF("mask %0hhx %0hhx\n", m1, m2); + + assert(holes1 <= 8 && holes2 <= 8); + assert(escape_set.size() <= 1U << (holes1 + holes2)); + if (escape_set.size() != 1U << (holes1 + holes2)) { + return false; + } + + if (m1_out) { + *m1_out = m1; + } + if (m2_out) { + *m2_out = m2; + } + + return true; +} + +static bool isCaselessDouble(const flat_set> &stop) { // test for vector containing if (stop.size() != 4) { @@ -148,6 +190,23 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { return; } + if (outs1 == 0) { + u8 m1; + u8 m2; + + if (buildDvermMask(info.double_stop2, &m1, &m2)) { + aux->accel_type = ACCEL_DVERM_MASKED; + aux->dverm.offset = offset; + aux->dverm.c1 = info.double_stop2.begin()->first & m1; + aux->dverm.c2 = info.double_stop2.begin()->second & m2; + aux->dverm.m1 = m1; + aux->dverm.m2 = m2; + DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + return; + } + } + if (outs1 + outs2 <= 8) { if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index d479a5457..9b30146cd 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -56,8 +56,6 @@ struct MultibyteAccelInfo { multiaccel_type type = MAT_NONE; }; -bool isCaselessDouble(const flat_set> &stop); - struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), single_stops(CharReach::dot()), @@ -79,6 +77,10 @@ struct AccelInfo { bool buildAccelAux(const AccelInfo &info, AccelAux *aux); +/* returns true is the escape set can be handled with a masked double_verm */ +bool buildDvermMask(const flat_set> &escape_set, + u8 *m1_out = nullptr, u8 *m2_out = nullptr); + } // namespace ue2 #endif diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 77ed5ac07..0bfc9d85b 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -81,6 +81,15 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end); break; + case ACCEL_DVERM_MASKED: + DEBUG_PRINTF("double vermicelli masked for " + "0x%02hhx%02hhx/0x%02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2, + aux->dverm.m1, aux->dverm.m2); + offset = aux->dverm.offset; + ptr = vermicelliDoubleMaskedExec(aux->dverm.c1, aux->dverm.c2, + aux->dverm.m1, aux->dverm.m2, ptr, end); + break; case ACCEL_MLVERM: DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c); offset = aux->mverm.offset; diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 279f454e7..87eed2501 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -29,6 +29,7 @@ #include "mcclellancompile.h" #include "accel.h" +#include "accelcompile.h" #include "grey.h" #include "mcclellan_internal.h" #include "mcclellancompile_accel.h" @@ -239,6 +240,20 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); return; } + + u8 m1; + u8 m2; + if (buildDvermMask(info.outs2, &m1, &m2)) { + accel->accel_type = ACCEL_DVERM_MASKED; + accel->dverm.offset = verify_u8(info.outs2_offset); + accel->dverm.c1 = info.outs2.begin()->first & m1; + accel->dverm.c2 = info.outs2.begin()->second & m2; + accel->dverm.m1 = m1; + accel->dverm.m2 = m2; + DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", + accel->dverm.c1, accel->dverm.c2); + return; + } } if (double_byte_ok(info)) { diff --git a/src/nfa/vermicelli.h b/src/nfa/vermicelli.h index e6957f9f6..36d7fb5fd 100644 --- a/src/nfa/vermicelli.h +++ b/src/nfa/vermicelli.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -185,6 +185,41 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, } } +static really_inline +const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) " + "over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf)); + assert(buf < buf_end); + assert((buf_end - buf) >= VERM_BOUNDARY); + + uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; + VERM_TYPE chars1 = VERM_SET_FN(c1); + VERM_TYPE chars2 = VERM_SET_FN(c2); + VERM_TYPE mask1 = VERM_SET_FN(m1); + VERM_TYPE mask2 = VERM_SET_FN(m2); + + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf forward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? + const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf); + if (p) { + return p; + } + + buf += VERM_BOUNDARY - min; + if (buf >= buf_end) { + return buf_end - 1; + } + } + + // Aligned loops from here on in + return dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2, + buf, buf_end); +} + // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if // character not found. static really_inline diff --git a/src/nfa/vermicelli_sse.h b/src/nfa/vermicelli_sse.h index 1a0415052..0a30306f6 100644 --- a/src/nfa/vermicelli_sse.h +++ b/src/nfa/vermicelli_sse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -172,6 +172,27 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, return buf; } +static really_inline +const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, + m128 mask1, m128 mask2, u8 c1, u8 c2, u8 m1, + u8 m2, const u8 *buf, const u8 *buf_end) { + assert((size_t)buf % 16 == 0); + + for (; buf + 16 < buf_end; buf += 16) { + m128 data = load128(buf); + u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)), + shiftRight8Bits(eq128(chars2, and128(data, mask2))))); + if ((buf[15] & m1) == c1 && (buf[16] & m2) == c2) { + z |= (1 << 15); + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + return buf; +} + // returns NULL if not found static really_inline const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { @@ -205,6 +226,22 @@ const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { return NULL; } +// returns NULL if not found +static really_inline +const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, + m128 mask1, m128 mask2, const u8 *buf) { + m128 data = loadu128(buf); // unaligned + u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)), + shiftRight8Bits(eq128(chars2, and128(data, mask2))))); + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + return NULL; +} + static really_inline const u8 *lastMatchOffset(const u8 *buf_end, u32 z) { assert(z); diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 80b3f0ecb..9c77dc67a 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -96,10 +96,12 @@ struct AccelScheme { return a.double_byte.size() < b.double_byte.size(); } - bool cd_a = isCaselessDouble(a.double_byte); - bool cd_b = isCaselessDouble(b.double_byte); - if (cd_a != cd_b) { - return cd_a > cd_b; + if (!a_dcount) { + bool cd_a = buildDvermMask(a.double_byte); + bool cd_b = buildDvermMask(b.double_byte); + if (cd_a != cd_b) { + return cd_a > cd_b; + } } ORDER_CHECK(double_byte.size()); ORDER_CHECK(double_offset); diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 4442754e2..6866b7c84 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -345,3 +345,179 @@ TEST(NVermicelli, Exec4) { } } +TEST(DoubleVermicelliMasked, ExecNoMatch1) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = vermicelliDoubleMaskedExec('a', 'b', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() -i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + } + } +} + +TEST(DoubleVermicelliMasked, Exec1) { + std::string t1("bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMaskedExec('a', 'b', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'B', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'b', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('b', 'a', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'A', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + } +} + +TEST(DoubleVermicelliMasked, Exec2) { + std::string t1("bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMaskedExec('a', 'a', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'a', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); +} +} + +TEST(DoubleVermicelliMasked, Exec3) { + /* 012345678901234567890123 */ + std::string t1("bbbbbbbbbbbbbbbbbaAaaAAaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMaskedExec('A', 'a', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 21, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'a', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); +} +} + +TEST(DoubleVermicelliMasked, Exec4) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 31; i++) { + t1[48 - i] = 'a'; + t1[48 - i + 1] = 'a'; + const u8 *rv = vermicelliDoubleMaskedExec('a', 'a', 0xff, 0xff, t1_raw, + t1_raw + t1.length()); + + ASSERT_EQ((size_t)&t1_raw[48 - i], (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR, t1_raw, + t1_raw + t1.length()); + + ASSERT_EQ((size_t)&t1_raw[48 - i], (size_t)rv); + } +} + From 6c7ee12bb9949b5576247bf620ef25fa8af497df Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 30 Mar 2016 11:14:59 +1100 Subject: [PATCH 139/218] make dverm more precise --- src/nfa/accel.c | 2 ++ src/nfa/vermicelli.h | 32 ++++++++++++++++++++++++++------ src/nfa/vermicelli_sse.h | 9 ++++++--- unit/internal/vermicelli.cpp | 23 ++++++++++------------- 4 files changed, 44 insertions(+), 22 deletions(-) diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 8a8694a87..99eab11dc 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -357,5 +357,7 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = MAX(c + accel->generic.offset, rv); rv -= accel->generic.offset; + DEBUG_PRINTF("advanced %zd\n", rv - c); + return rv; } diff --git a/src/nfa/vermicelli.h b/src/nfa/vermicelli.h index 36d7fb5fd..ba8afcf1d 100644 --- a/src/nfa/vermicelli.h +++ b/src/nfa/vermicelli.h @@ -178,11 +178,21 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, } // Aligned loops from here on in - if (nocase) { - return dvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end); - } else { - return dvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end); + const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2, + buf, buf_end) + : dvermSearchAligned(chars1, chars2, c1, c2, buf, + buf_end); + if (ptr) { + return ptr; } + + // Tidy up the mess at the end + ptr = nocase ? dvermPreconditionNocase(chars1, chars2, + buf_end - VERM_BOUNDARY) + : dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY); + /* buf_end - 1 to be conservative in case last byte is a partial match */ + return ptr ? ptr : buf_end - 1; + } static really_inline @@ -216,8 +226,18 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, } // Aligned loops from here on in - return dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2, - buf, buf_end); + const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, + c2, m1, m2, buf, buf_end); + if (ptr) { + return ptr; + } + + // Tidy up the mess at the end + ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2, + buf_end - VERM_BOUNDARY); + /* buf_end - 1 to be conservative in case last byte is a partial match */ + return ptr ? ptr : buf_end - 1; + } // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if diff --git a/src/nfa/vermicelli_sse.h b/src/nfa/vermicelli_sse.h index 0a30306f6..1883a44cf 100644 --- a/src/nfa/vermicelli_sse.h +++ b/src/nfa/vermicelli_sse.h @@ -147,7 +147,8 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, return buf + pos; } } - return buf; + + return NULL; } static really_inline @@ -169,7 +170,8 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, return buf + pos; } } - return buf; + + return NULL; } static really_inline @@ -190,7 +192,8 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, return buf + pos; } } - return buf; + + return NULL; } // returns NULL if not found diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 6866b7c84..5d66a3325 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -31,8 +31,6 @@ #include "gtest/gtest.h" #include "nfa/vermicelli.h" -#define BOUND (~(VERM_BOUNDARY - 1)) - TEST(Vermicelli, ExecNoMatch1) { char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -128,27 +126,27 @@ TEST(DoubleVermicelli, ExecNoMatch1) { const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); } } } @@ -355,31 +353,30 @@ TEST(DoubleVermicelliMasked, ExecNoMatch1) { t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); - + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() -i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); } } } From c0a5b037a1948d888e232b9b213abc45b0b2cc39 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 4 Apr 2016 11:02:59 +1000 Subject: [PATCH 140/218] allow double shufti to share buckets --- src/nfa/shufticompile.cpp | 97 ++++++++++++++++++-------- src/nfa/shufticompile.h | 6 +- unit/internal/shufti.cpp | 141 ++++++++++++++++++++++++++++++++------ 3 files changed, 194 insertions(+), 50 deletions(-) diff --git a/src/nfa/shufticompile.cpp b/src/nfa/shufticompile.cpp index 05072a44d..f909a0b8c 100644 --- a/src/nfa/shufticompile.cpp +++ b/src/nfa/shufticompile.cpp @@ -32,6 +32,7 @@ #include "shufticompile.h" #include "ue2common.h" #include "util/charreach.h" +#include "util/container.h" #include "util/ue2_containers.h" #include @@ -107,13 +108,35 @@ int shuftiBuildMasks(const CharReach &c, m128 *lo, m128 *hi) { return bit_index; } -void shuftiBuildDoubleMasks(const CharReach &onechar, +static +array or_array(array a, const array &b) { + a[0] |= b[0]; + a[1] |= b[1]; + a[2] |= b[2]; + a[3] |= b[3]; + + return a; +} + + +#define MAX_BUCKETS 8 +static +void set_buckets_from_mask(u16 nibble_mask, u32 bucket, + array &byte_mask) { + assert(bucket < MAX_BUCKETS); + + u32 mask = nibble_mask; + while (mask) { + u32 n = findAndClearLSB_32(&mask); + byte_mask[n] &= ~(1 << bucket); + } +} + +bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2) { DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), twochar.size()); - assert(onechar.count() + twochar.size() <= 8); - array lo1_a; array lo2_a; array hi1_a; @@ -124,43 +147,63 @@ void shuftiBuildDoubleMasks(const CharReach &onechar, hi1_a.fill(0xff); hi2_a.fill(0xff); - u32 i = 0; - // two-byte literals - for (flat_set>::const_iterator it = twochar.begin(); - it != twochar.end(); ++it, i++) { - DEBUG_PRINTF("%u: %02hhx %02hhx\n", i, it->first, it->second); - u8 b1 = it->first & 0xf; - u8 t1 = it->first >> 4; - u8 b2 = it->second & 0xf; - u8 t2 = it->second >> 4; - - lo1_a[b1] &= ~(1 << i); - hi1_a[t1] &= ~(1 << i); - lo2_a[b2] &= ~(1 << i); - hi2_a[t2] &= ~(1 << i); + vector> nibble_masks; + for (const auto &p : twochar) { + DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second); + u16 a_lo = 1U << (p.first & 0xf); + u16 a_hi = 1U << (p.first >> 4); + u16 b_lo = 1U << (p.second & 0xf); + u16 b_hi = 1U << (p.second >> 4); + nibble_masks.push_back({a_lo, a_hi, b_lo, b_hi}); } // one-byte literals (second byte is a wildcard) for (size_t it = onechar.find_first(); it != CharReach::npos; - it = onechar.find_next(it), i++) { - DEBUG_PRINTF("%u: %02hhx\n", i, (u8)it); - u8 b1 = it & 0xf; - u8 t1 = it >> 4; - - lo1_a[b1] &= ~(1 << i); - hi1_a[t1] &= ~(1 << i); + it = onechar.find_next(it)) { + DEBUG_PRINTF("%02hhx\n", (u8)it); + nibble_masks.push_back({(u16)(1U << (it & 0xf)), (u16)(1U << (it >> 4)), + 0xffff, 0xffff}); + } - for (int j = 0; j < 16; j++) { - lo2_a[j] &= ~(1 << i); - hi2_a[j] &= ~(1 << i); + // try to merge strings into shared buckets + for (u32 i = 0; i < 4; i++) { + map, array> new_masks; + for (const auto &a : nibble_masks) { + auto key = a; + key[i] = 0; + if (!contains(new_masks, key)) { + new_masks[key] = a; + } else { + new_masks[key] = or_array(new_masks[key], a); + } } + nibble_masks.clear(); + for (const auto &e : new_masks) { + nibble_masks.push_back(e.second); + } + } + + if (nibble_masks.size() > MAX_BUCKETS) { + DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size()); + return false; + } + + u32 i = 0; + for (const auto &a : nibble_masks) { + set_buckets_from_mask(a[0], i, lo1_a); + set_buckets_from_mask(a[1], i, hi1_a); + set_buckets_from_mask(a[2], i, lo2_a); + set_buckets_from_mask(a[3], i, hi2_a); + i++; } memcpy(lo1, lo1_a.data(), sizeof(m128)); memcpy(lo2, lo2_a.data(), sizeof(m128)); memcpy(hi1, hi1_a.data(), sizeof(m128)); memcpy(hi2, hi2_a.data(), sizeof(m128)); + + return true; } #ifdef DUMP_SUPPORT diff --git a/src/nfa/shufticompile.h b/src/nfa/shufticompile.h index 2795b73a4..59126b0b5 100644 --- a/src/nfa/shufticompile.h +++ b/src/nfa/shufticompile.h @@ -50,7 +50,11 @@ namespace ue2 { */ int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi); -void shuftiBuildDoubleMasks(const CharReach &onechar, +/** \brief Double-byte variant + * + * Returns false if we are unable to build the masks (too many buckets required) + */ +bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2); diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index ed48ad5cd..b8d77d374 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -283,7 +283,9 @@ TEST(DoubleShufti, BuildMask1) { lits.insert(make_pair('a', 'B')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -324,7 +326,9 @@ TEST(DoubleShufti, BuildMask2) { lits.insert(make_pair('a','z')); lits.insert(make_pair('B','z')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -350,7 +354,9 @@ TEST(DoubleShufti, BuildMask4) { lits.insert(make_pair('A','z')); lits.insert(make_pair('b','z')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -377,7 +383,9 @@ TEST(DoubleShufti, BuildMask5) { CharReach bytes; bytes.set('X'); - shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -395,6 +403,81 @@ TEST(DoubleShufti, BuildMask5) { lo1['B' % 16] | hi1['B' >> 4] | lo2['X' % 16] | hi2['X' >> 4]); } +TEST(DoubleShufti, BuildMask6) { + m128 lo1m, hi1m, lo2m, hi2m; + + flat_set> lits; + + lits.insert(make_pair('a','z')); + lits.insert(make_pair('B','z')); + lits.insert(make_pair('A','z')); + lits.insert(make_pair('b','z')); + lits.insert(make_pair('a','y')); + lits.insert(make_pair('B','y')); + lits.insert(make_pair('A','y')); + lits.insert(make_pair('b','y')); + lits.insert(make_pair('a','x')); + lits.insert(make_pair('B','x')); + lits.insert(make_pair('A','x')); + lits.insert(make_pair('b','x')); + + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); + + u8 *lo1 = (u8 *)&lo1m; + u8 *lo2 = (u8 *)&lo2m; + u8 *hi1 = (u8 *)&hi1m; + u8 *hi2 = (u8 *)&hi2m; + ASSERT_NE(0xff, + lo1['a' % 16] | hi1['a' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['A' % 16] | hi1['A' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['b' % 16] | hi1['b' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['B' % 16] | hi1['B' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['a' % 16] | hi1['a' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['A' % 16] | hi1['A' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['b' % 16] | hi1['b' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['B' % 16] | hi1['B' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['a' % 16] | hi1['a' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); + ASSERT_NE(0xff, + lo1['A' % 16] | hi1['A' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); + ASSERT_NE(0xff, + lo1['b' % 16] | hi1['b' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); + ASSERT_NE(0xff, + lo1['B' % 16] | hi1['B' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); +} + +TEST(DoubleShufti, BuildMask7) { + m128 lo1m, hi1m, lo2m, hi2m; + + flat_set> lits; + + lits.insert(make_pair('a','b')); + lits.insert(make_pair('c','d')); + lits.insert(make_pair('e','f')); + lits.insert(make_pair('g','h')); + lits.insert(make_pair('i','j')); + lits.insert(make_pair('k','l')); + lits.insert(make_pair('m','n')); + lits.insert(make_pair('o','p')); + lits.insert(make_pair('q','r')); + lits.insert(make_pair('s','t')); + lits.insert(make_pair('u','v')); + lits.insert(make_pair('w','x')); + + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_FALSE(rv); +} + TEST(DoubleShufti, ExecNoMatch1) { m128 lo1, hi1, lo2, hi2; @@ -402,7 +485,9 @@ TEST(DoubleShufti, ExecNoMatch1) { lits.insert(make_pair('a','b')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, + &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -421,7 +506,8 @@ TEST(DoubleShufti, ExecNoMatch1b) { lits.insert(make_pair('b','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -441,7 +527,8 @@ TEST(DoubleShufti, ExecNoMatch2) { lits.insert(make_pair('a','b')); lits.insert(make_pair('B','b')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -461,7 +548,8 @@ TEST(DoubleShufti, ExecNoMatch2b) { lits.insert(make_pair('b','a')); lits.insert(make_pair('b','B')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -480,7 +568,8 @@ TEST(DoubleShufti, ExecNoMatch3) { lits.insert(make_pair('V','e')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -499,7 +588,8 @@ TEST(DoubleShufti, ExecNoMatch3b) { lits.insert(make_pair('e','V')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -518,7 +608,8 @@ TEST(DoubleShufti, ExecMatch1) { lits.insert(make_pair('a','b')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -538,7 +629,8 @@ TEST(DoubleShufti, ExecMatch2) { lits.insert(make_pair('a','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -559,7 +651,8 @@ TEST(DoubleShufti, ExecMatch3) { lits.insert(make_pair('B','a')); lits.insert(make_pair('a','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbBaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -582,8 +675,8 @@ TEST(DoubleShufti, ExecMatch4) { lits.insert(make_pair('C','a')); lits.insert(make_pair('c','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -624,8 +717,8 @@ TEST(DoubleShufti, ExecMatch4b) { lits.insert(make_pair('a','C')); lits.insert(make_pair('a','c')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaAaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -663,7 +756,8 @@ TEST(DoubleShufti, ExecMatch5) { lits.insert(make_pair('a','A')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -686,7 +780,8 @@ TEST(DoubleShufti, ExecMatchMixed1) { // just one one-byte literal onebyte.set('a'); - shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -709,7 +804,8 @@ TEST(DoubleShufti, ExecMatchMixed2) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; char t2[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -742,7 +838,8 @@ TEST(DoubleShufti, ExecMatchMixed3) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); const int len = 420; char t1[len + 1]; From ed3ef5b997a411d8b60e574992e7189cd6f24be5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 4 Apr 2016 15:54:09 +1000 Subject: [PATCH 141/218] raise the limit of strings in double shufti --- src/nfa/accelcompile.cpp | 19 ++++++++----------- src/nfa/mcclellancompile.cpp | 11 ++++------- src/nfa/mcclellancompile_accel.cpp | 1 - src/nfagraph/ng_limex_accel.cpp | 7 ++++--- src/nfagraph/ng_limex_accel.h | 10 ++++++---- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index a9281c132..75960dda0 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -207,17 +207,14 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { } } - if (outs1 + outs2 <= 8) { - if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. - DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" - " two-byte literals\n", outs1, outs2); - aux->accel_type = ACCEL_DSHUFTI; - aux->dshufti.offset = offset; - shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2, - &aux->dshufti.lo1, - &aux->dshufti.hi1, - &aux->dshufti.lo2, - &aux->dshufti.hi2); + if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. + DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" + " two-byte literals\n", outs1, outs2); + aux->accel_type = ACCEL_DSHUFTI; + aux->dshufti.offset = offset; + if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2, + &aux->dshufti.lo1, &aux->dshufti.hi1, + &aux->dshufti.lo2, &aux->dshufti.hi2)) { return; } } diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 87eed2501..b44187309 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -130,7 +130,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) { static bool double_byte_ok(const escape_info &info) { return !info.outs2_broken - && info.outs2_single.count() + info.outs2.size() <= 8 && info.outs2_single.count() < info.outs2.size() && info.outs2_single.count() <= 2 && !info.outs2.empty(); } @@ -256,14 +255,12 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } } - if (double_byte_ok(info)) { + if (double_byte_ok(info) + && shuftiBuildDoubleMasks(info.outs2_single, info.outs2, + &accel->dshufti.lo1, &accel->dshufti.hi1, + &accel->dshufti.lo2, &accel->dshufti.hi2)) { accel->accel_type = ACCEL_DSHUFTI; accel->dshufti.offset = verify_u8(info.outs2_offset); - shuftiBuildDoubleMasks(info.outs2_single, info.outs2, - &accel->dshufti.lo1, - &accel->dshufti.hi1, - &accel->dshufti.lo2, - &accel->dshufti.hi2); DEBUG_PRINTF("state %hu is double shufti\n", this_idx); return; } diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 471d0d53b..3e73d31d4 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -334,7 +334,6 @@ map populateAccelerationInfo(const raw_dfa &rdfa, static bool double_byte_ok(const escape_info &info) { return !info.outs2_broken - && info.outs2_single.count() + info.outs2.size() <= 8 && info.outs2_single.count() < info.outs2.size() && info.outs2_single.count() <= 2 && !info.outs2.empty(); } diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 63ec546ba..8509b36ff 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -284,7 +284,7 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1, return as; } - if (two_count > 8) { + if (two_count > DOUBLE_SHUFTI_LIMIT) { if (cr_2.count() < cr_1.count()) { as.double_cr |= cr_2; offset = offset_in + 1; @@ -513,7 +513,7 @@ AccelScheme findBestAccelScheme(vector > paths, best.offset = offset; /* merge best single and best double */ - if (!da.double_byte.empty() && da.double_byte.size() <= 8 + if (!da.double_byte.empty() && da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT && da.double_cr.count() < best.cr.count()) { best.double_byte = da.double_byte; best.double_cr = da.double_cr; @@ -857,7 +857,8 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, // literals) if (depth > 1) { for (unsigned int i = 0; i < (depth - 1); i++) { - if (depthReach[i].count()*depthReach[i+1].count() <= 8) { + if (depthReach[i].count() * depthReach[i+1].count() + <= DOUBLE_SHUFTI_LIMIT) { DEBUG_PRINTF("two-byte shufti, depth %u\n", i); *as = AccelScheme(CharReach::dot(), i); return true; diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 9c77dc67a..16a6b770b 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -63,6 +63,8 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, u32 offset, ue2::flat_set *friends); +#define DOUBLE_SHUFTI_LIMIT 20 + struct AccelScheme { AccelScheme(const CharReach &cr_in, u32 offset_in) : cr(cr_in), offset(offset_in) { @@ -78,10 +80,10 @@ struct AccelScheme { size_t a_dcount = double_cr.count(); size_t b_dcount = b.double_cr.count(); - bool feasible_double_a - = !a.double_byte.empty() && a.double_byte.size() <= 8; - bool feasible_double_b - = !b.double_byte.empty() && b.double_byte.size() <= 8; + bool feasible_double_a = !a.double_byte.empty() + && a.double_byte.size() <= DOUBLE_SHUFTI_LIMIT; + bool feasible_double_b = !b.double_byte.empty() + && b.double_byte.size() <= DOUBLE_SHUFTI_LIMIT; if (feasible_double_a != feasible_double_b) { return feasible_double_a > feasible_double_b; From ff82ea6d6e36be28bda536d03ccc2e3a47bd7ed4 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 5 Apr 2016 11:07:26 +1000 Subject: [PATCH 142/218] smallwrite: don't recompile the dfa if prune fails --- src/nfa/mcclellancompile_util.cpp | 6 ++++-- src/nfa/mcclellancompile_util.h | 8 +++++++- src/smallwrite/smallwrite_build.cpp | 24 +++++++++++++----------- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 2c9465208..234574d84 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -228,13 +228,13 @@ void calc_min_dist_to_accept(const raw_dfa &raw, } } -void prune_overlong(raw_dfa &raw, u32 max_offset) { +bool prune_overlong(raw_dfa &raw, u32 max_offset) { DEBUG_PRINTF("pruning to at most %u\n", max_offset); vector bob_dist; u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist); if (max_min_dist_bob <= max_offset) { - return; + return false; } vector > in_edges; @@ -282,6 +282,8 @@ void prune_overlong(raw_dfa &raw, u32 max_offset) { /* update specials */ raw.start_floating = new_ids[raw.start_floating]; raw.start_anchored = new_ids[raw.start_anchored]; + + return true; } set all_reports(const raw_dfa &rdfa) { diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 7015893b5..7b6c033a9 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -37,7 +37,13 @@ namespace ue2 { u32 remove_leading_dots(raw_dfa &raw); -void prune_overlong(raw_dfa &raw, u32 max_offset); + +/** + * Prunes any states which cannot be reached within max_offset from start of + * stream. Returns false if no changes are made to the rdfa + */ +bool prune_overlong(raw_dfa &raw, u32 max_offset); + std::set all_reports(const raw_dfa &rdfa); bool has_eod_accepts(const raw_dfa &rdfa); bool has_non_eod_accepts(const raw_dfa &rdfa); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 097e04320..dfefe5e8d 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -317,21 +317,23 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, } if (is_slow(rdfa, accel_states, roseQuality)) { + DEBUG_PRINTF("is slow\n"); *small_region = cc.grey.smallWriteLargestBufferBad; if (*small_region <= *start_offset) { return nullptr; } - prune_overlong(rdfa, *small_region - *start_offset); - if (rdfa.start_anchored == DEAD_STATE) { - DEBUG_PRINTF("all patterns pruned out\n"); - return nullptr; - } + if (prune_overlong(rdfa, *small_region - *start_offset)) { + if (rdfa.start_anchored == DEAD_STATE) { + DEBUG_PRINTF("all patterns pruned out\n"); + return nullptr; + } - nfa = mcclellanCompile(rdfa, cc, &accel_states); - if (!nfa) { - DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); - assert(0); /* we were able to build orig dfa but not the trimmed? */ - return nullptr; + nfa = mcclellanCompile(rdfa, cc, &accel_states); + if (!nfa) { + DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); + assert(0); /* able to build orig dfa but not the trimmed? */ + return nullptr; + } } } else { *small_region = cc.grey.smallWriteLargestBuffer; From 850636dbd6ce636877edc66cd2d6e4fa0a57e456 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 5 Apr 2016 14:31:13 +1000 Subject: [PATCH 143/218] more efficent accel path discovery --- src/nfagraph/ng_limex_accel.cpp | 251 +++++++++++++++++++++----------- src/nfagraph/ng_limex_accel.h | 68 +-------- 2 files changed, 170 insertions(+), 149 deletions(-) diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 8509b36ff..7bad1c669 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -190,83 +190,148 @@ void findPaths(const NGHolder &g, NFAVertex v, } } -static -AccelScheme merge(AccelScheme a, const AccelScheme &b) { - a.cr |= b.cr; - ENSURE_AT_LEAST(&a.offset, b.offset); - a.double_cr |= b.double_cr; - insert(&a.double_byte, b.double_byte); - ENSURE_AT_LEAST(&a.double_offset, b.double_offset); - return a; -} +struct SAccelScheme { + SAccelScheme(const CharReach &cr_in, u32 offset_in) + : cr(cr_in), offset(offset_in) { + assert(offset <= MAX_ACCEL_DEPTH); + } + + SAccelScheme() {} + + bool operator<(const SAccelScheme &b) const { + const SAccelScheme &a = *this; + + const size_t a_count = cr.count(), b_count = b.cr.count(); + if (a_count != b_count) { + return a_count < b_count; + } + + /* TODO: give bonus if one is a 'caseless' character */ + ORDER_CHECK(offset); + ORDER_CHECK(cr); + return false; + } + + CharReach cr = CharReach::dot(); + u32 offset = MAX_ACCEL_DEPTH + 1; +}; static void findBest(vector >::const_iterator pb, vector >::const_iterator pe, - const AccelScheme &curr, AccelScheme *best) { + const SAccelScheme &curr, SAccelScheme *best) { assert(curr.offset <= MAX_ACCEL_DEPTH); DEBUG_PRINTF("paths left %zu\n", pe - pb); if (pb == pe) { + if (curr < *best) { + DEBUG_PRINTF("new best\n"); + *best = curr; + } *best = curr; return; } DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - vector priority_path; + vector priority_path; + priority_path.reserve(pb->size()); u32 i = 0; for (vector::const_iterator p = pb->begin(); p != pb->end(); ++p, i++) { - priority_path.push_back(AccelScheme(*p & ~curr.cr, i)); + SAccelScheme as(*p | curr.cr, MAX(i, curr.offset)); + if (*best < as) { + DEBUG_PRINTF("worse\n"); + continue; + } + priority_path.push_back(move(as)); } sort(priority_path.begin(), priority_path.end()); - for (vector::iterator it = priority_path.begin(); - it != priority_path.end(); ++it) { - vector::iterator jt = it + 1; + for (auto it = priority_path.begin(); it != priority_path.end(); ++it) { + auto jt = next(it); for (; jt != priority_path.end(); ++jt) { if (!it->cr.isSubsetOf(jt->cr)) { break; } } - priority_path.erase(it + 1, jt); + priority_path.erase(next(it), jt); DEBUG_PRINTF("||%zu\n", it->cr.count()); } DEBUG_PRINTF("---\n"); - for (vector::const_iterator it = priority_path.begin(); + for (vector::const_iterator it = priority_path.begin(); it != priority_path.end(); ++it) { DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(), priority_path.end() - it); - AccelScheme in = merge(curr, *it); + SAccelScheme in = move(*it); - if (in > *best) { + if (*best < in) { DEBUG_PRINTF("worse\n"); continue; } - AccelScheme temp = *best; - findBest(pb + 1, pe, in, &temp); - if (temp < *best) { - DEBUG_PRINTF("new best\n"); - *best = temp; - if (curr.cr == best->cr) { - return; /* could only get better by offset */ - } + findBest(pb + 1, pe, in, best); + + if (curr.cr == best->cr) { + return; /* could only get better by offset */ } } } +struct DAccelScheme { + DAccelScheme(const CharReach &cr_in, u32 offset_in) + : double_cr(cr_in), double_offset(offset_in) { + assert(double_offset <= MAX_ACCEL_DEPTH); + } + + DAccelScheme() {} + + bool operator<(const DAccelScheme &b) const { + const DAccelScheme &a = *this; + + size_t a_dcount = a.double_cr.count(); + size_t b_dcount = b.double_cr.count(); + + assert(!a.double_byte.empty() || a_dcount || a.double_offset); + assert(!b.double_byte.empty() || b_dcount || b.double_offset); + + if (a_dcount != b_dcount) { + return a_dcount < b_dcount; + } + + if (!a_dcount) { + bool cd_a = buildDvermMask(a.double_byte); + bool cd_b = buildDvermMask(b.double_byte); + if (cd_a != cd_b) { + return cd_a > cd_b; + } + } + + ORDER_CHECK(double_byte.size()); + ORDER_CHECK(double_offset); + + /* TODO: give bonus if one is a 'caseless' character */ + ORDER_CHECK(double_byte); + ORDER_CHECK(double_cr); + + return false; + } + + ue2::flat_set > double_byte; + CharReach double_cr; + u32 double_offset = 0; +}; + static -AccelScheme make_double_accel(AccelScheme as, CharReach cr_1, - const CharReach &cr_2_in, u32 offset_in) { +DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, + const CharReach &cr_2_in, u32 offset_in) { cr_1 &= ~as.double_cr; CharReach cr_2 = cr_2_in & ~as.double_cr; u32 offset = offset_in; if (cr_1.none()) { DEBUG_PRINTF("empty first element\n"); - as.double_offset = offset; + ENSURE_AT_LEAST(&as.double_offset, offset); return as; } @@ -280,7 +345,7 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1, if (!two_count) { DEBUG_PRINTF("empty element\n"); - as.double_offset = offset; + ENSURE_AT_LEAST(&as.double_offset, offset); return as; } @@ -296,63 +361,69 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1, i = cr_1.find_next(i)) { for (auto j = cr_2.find_first(); j != CharReach::npos; j = cr_2.find_next(j)) { - as.double_byte.insert(make_pair(i, j)); + as.double_byte.emplace(i, j); } } } - as.double_offset = offset; + ENSURE_AT_LEAST(&as.double_offset, offset); DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", - as.double_byte.size(), as.double_cr.count(), as.offset); + as.double_byte.size(), as.double_cr.count(), as.double_offset); return as; } static void findDoubleBest(vector >::const_iterator pb, vector >::const_iterator pe, - const AccelScheme &curr, AccelScheme *best) { - assert(curr.offset <= MAX_ACCEL_DEPTH); + const DAccelScheme &curr, DAccelScheme *best) { + assert(curr.double_offset <= MAX_ACCEL_DEPTH); DEBUG_PRINTF("paths left %zu\n", pe - pb); + DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n", + curr.double_byte.size(), curr.double_cr.count(), + curr.double_offset); if (pb == pe) { - *best = curr; + if (curr < *best) { + *best = curr; + DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", + best->double_byte.size(), best->double_cr.count(), + best->double_offset); + } return; } DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - vector priority_path; + vector priority_path; + priority_path.reserve(pb->size()); u32 i = 0; for (vector::const_iterator p = pb->begin(); p != pb->end() && next(p) != pb->end(); ++p, i++) { - priority_path.push_back(make_double_accel(curr, *p, *next(p), i)); + DAccelScheme as = make_double_accel(curr, *p, *next(p), i); + if (*best < as) { + DEBUG_PRINTF("worse\n"); + continue; + } + priority_path.push_back(move(as)); } sort(priority_path.begin(), priority_path.end()); - + DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size()); DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", best->double_byte.size(), best->double_cr.count(), - best->offset); + best->double_offset); - for (vector::const_iterator it = priority_path.begin(); + for (vector::const_iterator it = priority_path.begin(); it != priority_path.end(); ++it) { - - AccelScheme in = merge(curr, *it); + DAccelScheme in = move(*it); DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", - in.double_byte.size(), in.double_cr.count(), in.offset); - - if (in > *best) { + in.double_byte.size(), in.double_cr.count(), + in.double_offset); + if (*best < in) { DEBUG_PRINTF("worse\n"); continue; } - AccelScheme temp = *best; - findDoubleBest(pb + 1, pe, in, &temp); - if (temp < *best) { - *best = temp; - DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", - best->double_byte.size(), best->double_cr.count(), - best->offset); - } + findDoubleBest(pb + 1, pe, in, best); } } @@ -439,20 +510,23 @@ void improvePaths(vector > &paths) { #define MAX_DOUBLE_ACCEL_PATHS 10 static -AccelScheme findBestDoubleAccelScheme(vector > paths, - const CharReach &terminating) { +DAccelScheme findBestDoubleAccelScheme(vector > paths, + const CharReach &terminating) { DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n", terminating.count()); unifyPathsLastSegment(paths); - AccelScheme curr; - curr.double_cr = terminating; - curr.offset = 0; + +#ifdef DEBUG + DEBUG_PRINTF("paths:\n"); + dumpPaths(paths); +#endif + /* if there are too many paths, shorten the paths to reduce the number of * distinct paths we have to consider */ while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) { for (auto &p : paths) { if (p.empty()) { - return curr; + return DAccelScheme(terminating, 0U); } p.pop_back(); } @@ -460,39 +534,44 @@ AccelScheme findBestDoubleAccelScheme(vector > paths, } if (paths.empty()) { - return curr; + return DAccelScheme(terminating, 0U); } - AccelScheme best; - best.double_cr = CharReach::dot(); + DAccelScheme curr(terminating, 0U); + DAccelScheme best(CharReach::dot(), 0U); findDoubleBest(paths.begin(), paths.end(), curr, &best); - curr = best; - DEBUG_PRINTF("da %zu pairs, %zu singles\n", curr.double_byte.size(), - curr.double_cr.count()); - return curr; + DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(), + best.double_cr.count()); + return best; } +#define MAX_EXPLORE_PATHS 40 + AccelScheme findBestAccelScheme(vector > paths, const CharReach &terminating, bool look_for_double_byte) { - AccelScheme da; - + AccelScheme rv; if (look_for_double_byte) { - da = findBestDoubleAccelScheme(paths, terminating); + DAccelScheme da = findBestDoubleAccelScheme(paths, terminating); + if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) { + rv.double_byte = move(da.double_byte); + rv.double_cr = move(da.double_cr); + rv.double_offset = da.double_offset; + } } improvePaths(paths); DEBUG_PRINTF("we have %zu paths\n", paths.size()); - if (paths.size() > 40) { - return da; /* too many paths to explore */ + if (paths.size() > MAX_EXPLORE_PATHS) { + return rv; /* too many paths to explore */ } /* if we were smart we would do something netflowy on the paths to find the * best cut. But we aren't, so we will just brute force it. */ - AccelScheme curr(terminating, 0U); - AccelScheme best; + SAccelScheme curr(terminating, 0U); + SAccelScheme best; findBest(paths.begin(), paths.end(), curr, &best); /* find best is a bit lazy in terms of minimising the offset, see if we can @@ -512,15 +591,13 @@ AccelScheme findBestAccelScheme(vector > paths, assert(offset <= best.offset); best.offset = offset; - /* merge best single and best double */ - if (!da.double_byte.empty() && da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT - && da.double_cr.count() < best.cr.count()) { - best.double_byte = da.double_byte; - best.double_cr = da.double_cr; - best.double_offset = da.double_offset; + rv.offset = best.offset; + rv.cr = best.cr; + if (rv.cr.count() < rv.double_cr.count()) { + rv.double_byte.clear(); } - return best; + return rv; } AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, @@ -832,7 +909,9 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, for (unsigned int i = 0; i < depth; i++) { if (depthReach[i].none()) { DEBUG_PRINTF("red tape acceleration engine depth %u\n", i); - *as = AccelScheme(CharReach(), i); + *as = AccelScheme(); + as->offset = i; + as->cr = CharReach(); return true; } } @@ -847,7 +926,8 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, || (cra.count() == 2 && crb.count() == 2 && cra.isBit5Insensitive() && crb.isBit5Insensitive())) { DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i); - *as = AccelScheme(CharReach::dot(), i); + *as = AccelScheme(); + as->offset = i; return true; } } @@ -860,7 +940,8 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, if (depthReach[i].count() * depthReach[i+1].count() <= DOUBLE_SHUFTI_LIMIT) { DEBUG_PRINTF("two-byte shufti, depth %u\n", i); - *as = AccelScheme(CharReach::dot(), i); + *as = AccelScheme(); + as->offset = i; return true; } } diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 16a6b770b..c5f4e4bc4 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -66,71 +66,10 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, #define DOUBLE_SHUFTI_LIMIT 20 struct AccelScheme { - AccelScheme(const CharReach &cr_in, u32 offset_in) - : cr(cr_in), offset(offset_in) { - assert(offset <= MAX_ACCEL_DEPTH); - } - AccelScheme() : cr(CharReach::dot()), offset(MAX_ACCEL_DEPTH + 1) {} - - bool operator<(const AccelScheme &b) const { - const AccelScheme &a = *this; - - // Don't use ORDER_CHECK as it will (stupidly) eval count() too many - // times. - size_t a_dcount = double_cr.count(); - size_t b_dcount = b.double_cr.count(); - - bool feasible_double_a = !a.double_byte.empty() - && a.double_byte.size() <= DOUBLE_SHUFTI_LIMIT; - bool feasible_double_b = !b.double_byte.empty() - && b.double_byte.size() <= DOUBLE_SHUFTI_LIMIT; - - if (feasible_double_a != feasible_double_b) { - return feasible_double_a > feasible_double_b; - } - - if (feasible_double_a) { - if (a_dcount != b_dcount) { - return a_dcount < b_dcount; - } - - if ((a.double_byte.size() == 1) != (b.double_byte.size() == 1)) { - return a.double_byte.size() < b.double_byte.size(); - } - - if (!a_dcount) { - bool cd_a = buildDvermMask(a.double_byte); - bool cd_b = buildDvermMask(b.double_byte); - if (cd_a != cd_b) { - return cd_a > cd_b; - } - } - ORDER_CHECK(double_byte.size()); - ORDER_CHECK(double_offset); - } - - const size_t a_count = cr.count(), b_count = b.cr.count(); - if (a_count != b_count) { - return a_count < b_count; - } - - /* TODO: give bonus if one is a 'caseless' character */ - ORDER_CHECK(offset); - ORDER_CHECK(cr); - ORDER_CHECK(double_byte); - ORDER_CHECK(double_cr); - ORDER_CHECK(double_offset); - return false; - } - - bool operator>(const AccelScheme &b) const { - return b < *this; - } - ue2::flat_set > double_byte; - CharReach cr; + CharReach cr = CharReach::dot(); CharReach double_cr; - u32 offset; + u32 offset = MAX_ACCEL_DEPTH + 1; u32 double_offset = 0; }; @@ -153,7 +92,8 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, AccelScheme *as, bool allow_wide); -/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ +/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). + */ MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, const std::vector &verts, const CompileContext &cc); From ff721ed8e4664e21924e70a14cd1d8bfe728f3d7 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 6 Apr 2016 16:43:32 +1000 Subject: [PATCH 144/218] unify some accel code/structures between limex and mcclellan --- CMakeLists.txt | 1 + src/nfa/goughcompile.cpp | 25 ++-- src/nfa/limex_accel.c | 205 +---------------------------- src/nfa/mcclellancompile.cpp | 76 +++++------ src/nfa/mcclellancompile.h | 18 +-- src/nfa/mcclellancompile_accel.cpp | 93 +++++++------ src/nfa/mcclellancompile_accel.h | 4 +- src/nfagraph/ng_limex_accel.h | 10 +- src/util/accel_scheme.h | 51 +++++++ 9 files changed, 156 insertions(+), 327 deletions(-) create mode 100644 src/util/accel_scheme.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e1bd27943..5d1d741ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -827,6 +827,7 @@ SET (hs_SRCS src/rose/rose_in_graph.h src/rose/rose_in_util.cpp src/rose/rose_in_util.h + src/util/accel_scheme.h src/util/alloc.cpp src/util/alloc.h src/util/bitfield.h diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 2ad3c6dd6..b75e0463e 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -85,9 +85,9 @@ class gough_build_strat : public mcclellan_build_strat { vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - escape_info find_escape_strings(dstate_id_t this_idx) const override; + AccelScheme find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override { return sizeof(gough_accel); } - void buildAccel(dstate_id_t this_idx, const escape_info &info, + void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) override; u32 max_allowed_offset_accel() const override { return 0; } @@ -1146,31 +1146,32 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, return gough_dfa; } -escape_info gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { - escape_info rv; +AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { + AccelScheme rv; if (!contains(accel_gough_info, this_idx)) { - rv.outs = CharReach::dot(); - rv.outs2_broken = true; + rv.cr = CharReach::dot(); + rv.double_byte.clear(); return rv; } rv = mcclellan_build_strat::find_escape_strings(this_idx); - assert(!rv.offset); /* should have been limited by strat */ + assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */ if (rv.offset) { - rv.outs = CharReach::dot(); - rv.outs2_broken = true; + rv.cr = CharReach::dot(); + rv.double_byte.clear(); return rv; } - if (!accel_gough_info.at(this_idx).two_byte) { - rv.outs2_broken = true; + if (rv.double_offset + || !accel_gough_info.at(this_idx).two_byte) { + rv.double_byte.clear(); } return rv; } -void gough_build_strat::buildAccel(dstate_id_t this_idx, const escape_info &info, +void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) { assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); gough_accel *accel = (gough_accel *)accel_out; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 0bfc9d85b..2c73f9ff0 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -49,209 +49,6 @@ #include "util/simd_utils_ssse3.h" #include "util/shuffle_ssse3.h" -static -const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { - assert(ISALIGNED(aux)); // must be SIMD aligned for shufti - assert(end > ptr); - assert(end - ptr >= 16); // must be at least 16 bytes to scan - - const u8 *start = ptr; - u8 offset; - switch (aux->accel_type) { - case ACCEL_VERM: - DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c); - offset = aux->verm.offset; - ptr = vermicelliExec(aux->verm.c, 0, ptr, end); - break; - case ACCEL_VERM_NOCASE: - DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c); - offset = aux->verm.offset; - ptr = vermicelliExec(aux->verm.c, 1, ptr, end); - break; - case ACCEL_DVERM: - DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - offset = aux->dverm.offset; - ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end); - break; - case ACCEL_DVERM_NOCASE: - DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - offset = aux->dverm.offset; - ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, - 1, ptr, end); - break; - case ACCEL_DVERM_MASKED: - DEBUG_PRINTF("double vermicelli masked for " - "0x%02hhx%02hhx/0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2, - aux->dverm.m1, aux->dverm.m2); - offset = aux->dverm.offset; - ptr = vermicelliDoubleMaskedExec(aux->dverm.c1, aux->dverm.c2, - aux->dverm.m1, aux->dverm.m2, ptr, end); - break; - case ACCEL_MLVERM: - DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = long_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MLVERM_NOCASE: - DEBUG_PRINTF("long vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = long_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MLGVERM: - DEBUG_PRINTF("long grab vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = longgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MLGVERM_NOCASE: - DEBUG_PRINTF("long grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = longgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MSVERM: - DEBUG_PRINTF("shift vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shift_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MSVERM_NOCASE: - DEBUG_PRINTF("shift vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shift_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MSGVERM: - DEBUG_PRINTF("shift grab vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shiftgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MSGVERM_NOCASE: - DEBUG_PRINTF("shift grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shiftgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MDSVERM: - DEBUG_PRINTF("double shift vermicelli for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mdverm.offset; - ptr = doubleshift_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_MDSVERM_NOCASE: - DEBUG_PRINTF("double shift vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mverm.offset; - ptr = doubleshift_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_MDSGVERM: - DEBUG_PRINTF("double shift grab vermicelli for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mverm.offset; - ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_MDSGVERM_NOCASE: - DEBUG_PRINTF("double shift grab vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mverm.offset; - ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_SHUFTI: - DEBUG_PRINTF("single shufti\n"); - offset = aux->shufti.offset; - ptr = shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end); - break; - case ACCEL_DSHUFTI: - DEBUG_PRINTF("double shufti\n"); - offset = aux->dshufti.offset; - ptr = shuftiDoubleExec(aux->dshufti.lo1, aux->dshufti.hi1, - aux->dshufti.lo2, aux->dshufti.hi2, ptr, end); - break; - case ACCEL_MLSHUFTI: - offset = aux->mshufti.offset; - ptr = long_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MLGSHUFTI: - offset = aux->mshufti.offset; - ptr = longgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MSSHUFTI: - offset = aux->mshufti.offset; - ptr = shift_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MSGSHUFTI: - offset = aux->mshufti.offset; - ptr = shiftgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MDSSHUFTI: - offset = aux->mdshufti.offset; - ptr = doubleshift_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end, - aux->mdshufti.len1, aux->mdshufti.len2); - break; - case ACCEL_MDSGSHUFTI: - offset = aux->mdshufti.offset; - ptr = doubleshiftgrab_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end, - aux->mdshufti.len1, aux->mdshufti.len2); - break; - case ACCEL_TRUFFLE: - DEBUG_PRINTF("truffle shuffle\n"); - offset = aux->truffle.offset; - ptr = truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end); - break; - case ACCEL_MLTRUFFLE: - DEBUG_PRINTF("long match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = long_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MLGTRUFFLE: - DEBUG_PRINTF("long grab match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = longgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MSTRUFFLE: - DEBUG_PRINTF("shift match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = shift_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MSGTRUFFLE: - DEBUG_PRINTF("shift grab match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = shiftgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MDSTRUFFLE: - DEBUG_PRINTF("double shift match truffle shuffle\n"); - offset = aux->mdtruffle.offset; - ptr = doubleshift_truffleExec(aux->mdtruffle.mask1, - aux->mdtruffle.mask2, ptr, end, - aux->mdtruffle.len1, - aux->mdtruffle.len2); - break; - case ACCEL_MDSGTRUFFLE: - DEBUG_PRINTF("double shift grab match truffle shuffle\n"); - offset = aux->mdtruffle.offset; - ptr = doubleshiftgrab_truffleExec(aux->mdtruffle.mask1, - aux->mdtruffle.mask2, ptr, end, - aux->mdtruffle.len1, - aux->mdtruffle.len2); - break; - case ACCEL_RED_TAPE: - ptr = end; /* there is no escape */ - offset = aux->generic.offset; - break; - default: - /* no acceleration, fall through and return current ptr */ - offset = 0; - break; - } - - if (offset) { - ptr -= offset; - if (ptr < start) { - return start; - } - } - - return ptr; -} - static really_inline size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, const u8 *input, u32 idx, size_t i, size_t end) { @@ -272,7 +69,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, } aux = aux + aux_idx; - const u8 *ptr = accelScan(aux, &input[i], &input[end]); + const u8 *ptr = run_accel(aux, &input[i], &input[end]); assert(ptr >= &input[i]); size_t j = (size_t)(ptr - input); DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i)); diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index b44187309..8bc0b9d86 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -128,10 +128,10 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) { } static -bool double_byte_ok(const escape_info &info) { - return !info.outs2_broken - && info.outs2_single.count() < info.outs2.size() - && info.outs2_single.count() <= 2 && !info.outs2.empty(); +bool double_byte_ok(const AccelScheme &info) { + return !info.double_byte.empty() + && info.double_cr.count() < info.double_byte.size() + && info.double_cr.count() <= 2 && !info.double_byte.empty(); } static @@ -189,7 +189,7 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const { return ACCEL_DFA_MAX_OFFSET_DEPTH; } -escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) +AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) const { return find_mcclellan_escape_info(rdfa, this_idx, max_allowed_offset_accel()); @@ -197,33 +197,33 @@ escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) /** builds acceleration schemes for states */ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, - const escape_info &info, + const AccelScheme &info, void *accel_out) { AccelAux *accel = (AccelAux *)accel_out; DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, - info.outs2_offset); + info.double_offset); accel->generic.offset = verify_u8(info.offset); - if (double_byte_ok(info) && info.outs2_single.none() - && info.outs2.size() == 1) { + if (double_byte_ok(info) && info.double_cr.none() + && info.double_byte.size() == 1) { accel->accel_type = ACCEL_DVERM; - accel->dverm.c1 = info.outs2.begin()->first; - accel->dverm.c2 = info.outs2.begin()->second; - accel->dverm.offset = verify_u8(info.outs2_offset); + accel->dverm.c1 = info.double_byte.begin()->first; + accel->dverm.c2 = info.double_byte.begin()->second; + accel->dverm.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); return; } - if (double_byte_ok(info) && info.outs2_single.none() - && (info.outs2.size() == 2 || info.outs2.size() == 4)) { + if (double_byte_ok(info) && info.double_cr.none() + && (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { bool ok = true; - assert(!info.outs2.empty()); - u8 firstC = info.outs2.begin()->first & CASE_CLEAR; - u8 secondC = info.outs2.begin()->second & CASE_CLEAR; + assert(!info.double_byte.empty()); + u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; + u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; - for (const pair &p : info.outs2) { + for (const pair &p : info.double_byte) { if ((p.first & CASE_CLEAR) != firstC || (p.second & CASE_CLEAR) != secondC) { ok = false; @@ -235,18 +235,18 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, accel->accel_type = ACCEL_DVERM_NOCASE; accel->dverm.c1 = firstC; accel->dverm.c2 = secondC; - accel->dverm.offset = verify_u8(info.outs2_offset); + accel->dverm.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); return; } u8 m1; u8 m2; - if (buildDvermMask(info.outs2, &m1, &m2)) { + if (buildDvermMask(info.double_byte, &m1, &m2)) { accel->accel_type = ACCEL_DVERM_MASKED; - accel->dverm.offset = verify_u8(info.outs2_offset); - accel->dverm.c1 = info.outs2.begin()->first & m1; - accel->dverm.c2 = info.outs2.begin()->second & m2; + accel->dverm.offset = verify_u8(info.double_offset); + accel->dverm.c1 = info.double_byte.begin()->first & m1; + accel->dverm.c2 = info.double_byte.begin()->second & m2; accel->dverm.m1 = m1; accel->dverm.m2 = m2; DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", @@ -256,52 +256,52 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } if (double_byte_ok(info) - && shuftiBuildDoubleMasks(info.outs2_single, info.outs2, + && shuftiBuildDoubleMasks(info.double_cr, info.double_byte, &accel->dshufti.lo1, &accel->dshufti.hi1, &accel->dshufti.lo2, &accel->dshufti.hi2)) { accel->accel_type = ACCEL_DSHUFTI; - accel->dshufti.offset = verify_u8(info.outs2_offset); + accel->dshufti.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is double shufti\n", this_idx); return; } - if (info.outs.none()) { + if (info.cr.none()) { accel->accel_type = ACCEL_RED_TAPE; DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" " from which there is no escape\n", this_idx); return; } - if (info.outs.count() == 1) { + if (info.cr.count() == 1) { accel->accel_type = ACCEL_VERM; - accel->verm.c = info.outs.find_first(); + accel->verm.c = info.cr.find_first(); DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); return; } - if (info.outs.count() == 2 && info.outs.isCaselessChar()) { + if (info.cr.count() == 2 && info.cr.isCaselessChar()) { accel->accel_type = ACCEL_VERM_NOCASE; - accel->verm.c = info.outs.find_first() & CASE_CLEAR; + accel->verm.c = info.cr.find_first() & CASE_CLEAR; DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); return; } - if (info.outs.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { + if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { accel->accel_type = ACCEL_NONE; DEBUG_PRINTF("state %hu is too broad\n", this_idx); return; } accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(info.outs, &accel->shufti.lo, + if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) { DEBUG_PRINTF("state %hu is shufti\n", this_idx); return; } - assert(!info.outs.none()); + assert(!info.cr.none()); accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(info.outs, &accel->truffle.mask1, &accel->truffle.mask2); + truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2); DEBUG_PRINTF("state %hu is truffle\n", this_idx); } @@ -486,7 +486,7 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, } static -void fillAccelOut(const map &accel_escape_info, +void fillAccelOut(const map &accel_escape_info, set *accel_states) { for (dstate_id_t i : accel_escape_info | map_keys) { accel_states->insert(i); @@ -581,7 +581,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - map accel_escape_info + map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = (1 << info.getAlphaShift()) @@ -748,7 +748,7 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, static void allocateFSN8(dfa_info &info, - const map &accel_escape_info, + const map &accel_escape_info, u16 *accel_limit, u16 *accept_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ @@ -801,7 +801,7 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - map accel_escape_info + map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 8dcc161bc..781e262da 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -31,6 +31,7 @@ #include "rdfa.h" #include "ue2common.h" +#include "util/accel_scheme.h" #include "util/alloc.h" #include "util/charreach.h" #include "util/ue2_containers.h" @@ -54,15 +55,6 @@ struct raw_report_info { std::vector &ro /* out */) const = 0; }; -struct escape_info { - CharReach outs; - CharReach outs2_single; - flat_set> outs2; - bool outs2_broken = false; - u32 offset = 0; - u32 outs2_offset = 0; -}; - class dfa_build_strat { public: virtual ~dfa_build_strat(); @@ -72,9 +64,9 @@ class dfa_build_strat { std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const = 0; - virtual escape_info find_escape_strings(dstate_id_t this_idx) const = 0; + virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0; virtual size_t accelSize(void) const = 0; - virtual void buildAccel(dstate_id_t this_idx, const escape_info &info, + virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) = 0; }; @@ -87,9 +79,9 @@ class mcclellan_build_strat : public dfa_build_strat { std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - escape_info find_escape_strings(dstate_id_t this_idx) const override; + AccelScheme find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override; - void buildAccel(dstate_id_t this_idx,const escape_info &info, + void buildAccel(dstate_id_t this_idx,const AccelScheme &info, void *accel_out) override; virtual u32 max_allowed_offset_accel() const; diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 3e73d31d4..432c035dd 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -178,25 +178,14 @@ vector > generate_paths(const raw_dfa &rdfa, dstate_id_t base, } static -escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, +AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, u32 max_allowed_accel_offset) { DEBUG_PRINTF("looking for accel for %hu\n", base); vector > paths = generate_paths(rdfa, base, max_allowed_accel_offset + 1); AccelScheme as = findBestAccelScheme(paths, CharReach(), true); - escape_info rv; - rv.offset = as.offset; - rv.outs = as.cr; - if (!as.double_byte.empty()) { - rv.outs2_single = as.double_cr; - rv.outs2 = as.double_byte; - rv.outs2_offset = as.double_offset; - rv.outs2_broken = false; - } else { - rv.outs2_broken = true; - } DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); - return rv; + return as; } static @@ -214,18 +203,18 @@ vector find_nonexit_symbols(const raw_dfa &rdfa, static set find_region(const raw_dfa &rdfa, dstate_id_t base, - const escape_info &ei) { + const AccelScheme &ei) { DEBUG_PRINTF("looking for region around %hu\n", base); set region = {base}; - if (!ei.outs2_broken) { + if (!ei.double_byte.empty()) { return region; } - DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.outs).c_str(), ei.offset); + DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset); - const CharReach &escape = ei.outs; + const CharReach &escape = ei.cr; auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); vector pending = {base}; @@ -248,16 +237,16 @@ set find_region(const raw_dfa &rdfa, dstate_id_t base, } static -bool better(const escape_info &a, const escape_info &b) { - if (!a.outs2_broken && b.outs2_broken) { +bool better(const AccelScheme &a, const AccelScheme &b) { + if (!a.double_byte.empty() && b.double_byte.empty()) { return true; } - if (!b.outs2_broken) { + if (!b.double_byte.empty()) { return false; } - return a.outs.count() < b.outs.count(); + return a.cr.count() < b.cr.count(); } static @@ -271,10 +260,10 @@ vector reverse_alpha_remapping(const raw_dfa &rdfa) { return rv; } -map populateAccelerationInfo(const raw_dfa &rdfa, +map populateAccelerationInfo(const raw_dfa &rdfa, const dfa_build_strat &strat, const Grey &grey) { - map rv; + map rv; if (!grey.accelerateDFA) { return rv; } @@ -283,7 +272,7 @@ map populateAccelerationInfo(const raw_dfa &rdfa, DEBUG_PRINTF("sds %hu\n", sds_proxy); for (size_t i = 0; i < rdfa.states.size(); i++) { - escape_info ei = strat.find_escape_strings(i); + AccelScheme ei = strat.find_escape_strings(i); if (i == DEAD_STATE) { continue; @@ -301,25 +290,25 @@ map populateAccelerationInfo(const raw_dfa &rdfa, : ACCEL_DFA_MAX_STOP_CHAR; DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); - if (ei.outs.count() > single_limit) { + if (ei.cr.count() > single_limit) { DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, - ei.outs.count()); + ei.cr.count()); continue; } DEBUG_PRINTF("state %zu should be accelerable %zu\n", - i, ei.outs.count()); + i, ei.cr.count()); rv[i] = ei; } /* provide accleration states to states in the region of sds */ if (contains(rv, sds_proxy)) { - escape_info sds_ei = rv[sds_proxy]; - sds_ei.outs2_broken = true; /* region based on single byte scheme + AccelScheme sds_ei = rv[sds_proxy]; + sds_ei.double_byte.clear(); /* region based on single byte scheme * may differ from double byte */ DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", - sds_ei.outs.count()); + sds_ei.cr.count()); auto sds_region = find_region(rdfa, sds_proxy, sds_ei); for (auto s : sds_region) { if (!contains(rv, s) || better(sds_ei, rv[s])) { @@ -332,18 +321,20 @@ map populateAccelerationInfo(const raw_dfa &rdfa, } static -bool double_byte_ok(const escape_info &info) { - return !info.outs2_broken - && info.outs2_single.count() < info.outs2.size() - && info.outs2_single.count() <= 2 && !info.outs2.empty(); +bool double_byte_ok(const AccelScheme &info) { + return !info.double_byte.empty() + && info.double_cr.count() < info.double_byte.size() + && info.double_cr.count() <= 2 && !info.double_byte.empty(); } -escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, - dstate_id_t this_idx, +AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx, u32 max_allowed_accel_offset) { - escape_info rv; + AccelScheme rv; + rv.cr.clear(); + rv.offset = 0; const dstate &raw = rdfa.states[this_idx]; const vector rev_map = reverse_alpha_remapping(rdfa); + bool outs2_broken = false; for (u32 i = 0; i < rev_map.size(); i++) { if (raw.next[i] == this_idx) { @@ -352,17 +343,17 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, const CharReach &cr_i = rev_map.at(i); - rv.outs |= cr_i; + rv.cr |= cr_i; DEBUG_PRINTF("next is %hu\n", raw.next[i]); const dstate &raw_next = rdfa.states[raw.next[i]]; if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { DEBUG_PRINTF("leads to report\n"); - rv.outs2_broken = true; /* cannot accelerate over reports */ + outs2_broken = true; /* cannot accelerate over reports */ } - if (rv.outs2_broken) { + if (outs2_broken) { continue; } @@ -378,35 +369,39 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, } if (cr_i.count() * cr_all_j.count() > 8) { - DEBUG_PRINTF("adding sym %u to outs2_single\n", i); - rv.outs2_single |= cr_i; + DEBUG_PRINTF("adding sym %u to double_cr\n", i); + rv.double_cr |= cr_i; } else { for (auto ii = cr_i.find_first(); ii != CharReach::npos; ii = cr_i.find_next(ii)) { for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; jj = cr_all_j.find_next(jj)) { - rv.outs2.emplace((u8)ii, (u8)jj); + rv.double_byte.emplace((u8)ii, (u8)jj); } } } - if (rv.outs2.size() > 8) { + if (rv.double_byte.size() > 8) { DEBUG_PRINTF("outs2 too big\n"); - rv.outs2_broken = true; + outs2_broken = true; } } + if (outs2_broken) { + rv.double_byte.clear(); + } + DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); - DEBUG_PRINTF("broken %d\n", rv.outs2_broken); + DEBUG_PRINTF("broken %d\n", outs2_broken); if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) && this_idx == rdfa.start_floating && this_idx != DEAD_STATE) { DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); auto offset = look_for_offset_accel(rdfa, this_idx, max_allowed_accel_offset); - DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(), - rv.outs.count()); - if (double_byte_ok(offset) || offset.outs.count() < rv.outs.count()) { + DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), + rv.cr.count()); + if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) { DEBUG_PRINTF("using offset accel\n"); rv = offset; } diff --git a/src/nfa/mcclellancompile_accel.h b/src/nfa/mcclellancompile_accel.h index aa1672b00..427267d77 100644 --- a/src/nfa/mcclellancompile_accel.h +++ b/src/nfa/mcclellancompile_accel.h @@ -48,11 +48,11 @@ struct Grey; * than normal states as accelerating sds is important. Matches NFA value */ #define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 -std::map populateAccelerationInfo(const raw_dfa &rdfa, +std::map populateAccelerationInfo(const raw_dfa &rdfa, const dfa_build_strat &strat, const Grey &grey); -escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, +AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx, u32 max_allowed_accel_offset); diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index c5f4e4bc4..cb3d12104 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -37,6 +37,7 @@ #include "ng_misc_opt.h" #include "ue2common.h" #include "nfa/accelcompile.h" +#include "util/accel_scheme.h" #include "util/charreach.h" #include "util/order_check.h" #include "util/ue2_containers.h" @@ -47,7 +48,6 @@ namespace ue2 { /* compile time accel defs */ -#define MAX_ACCEL_DEPTH 4 #define MAX_MERGED_ACCEL_STOPS 200 #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ @@ -65,14 +65,6 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, #define DOUBLE_SHUFTI_LIMIT 20 -struct AccelScheme { - ue2::flat_set > double_byte; - CharReach cr = CharReach::dot(); - CharReach double_cr; - u32 offset = MAX_ACCEL_DEPTH + 1; - u32 double_offset = 0; -}; - NFAVertex get_sds_or_proxy(const NGHolder &g); AccelScheme nfaFindAccel(const NGHolder &g, const std::vector &verts, diff --git a/src/util/accel_scheme.h b/src/util/accel_scheme.h new file mode 100644 index 000000000..f524fe937 --- /dev/null +++ b/src/util/accel_scheme.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ACCEL_SCHEME_H +#define ACCEL_SCHEME_H + +#include "util/charreach.h" +#include "util/ue2_containers.h" + +#include + +namespace ue2 { + +#define MAX_ACCEL_DEPTH 4 + +struct AccelScheme { + flat_set > double_byte; + CharReach cr = CharReach::dot(); + CharReach double_cr; + u32 offset = MAX_ACCEL_DEPTH + 1; + u32 double_offset = 0; +}; + +} + +#endif From 938ac9fd38e4122244bfb945f1a04e821f5c7383 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 11 Apr 2016 13:47:10 +1000 Subject: [PATCH 145/218] CharReach operators inline --- src/util/charreach.cpp | 40 +--------------------------------------- src/util/charreach.h | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 46 deletions(-) diff --git a/src/util/charreach.cpp b/src/util/charreach.cpp index a231bbb05..9116b719d 100644 --- a/src/util/charreach.cpp +++ b/src/util/charreach.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -50,44 +50,6 @@ void CharReach::set(const std::string &s) { } } -/// Bitwise OR. -CharReach CharReach::operator|(const CharReach &a) const { - CharReach cr(*this); - cr.bits |= a.bits; - return cr; -} - -/// Bitwise OR-equals. -void CharReach::operator|=(const CharReach &a) { - bits |= a.bits; -} - -/// Bitwise AND. -CharReach CharReach::operator&(const CharReach &a) const { - CharReach cr(*this); - cr.bits &= a.bits; - return cr; -} - -/// Bitwise AND-equals. -void CharReach::operator&=(const CharReach &a) { - bits &= a.bits; -} - -/// Bitwise complement. -CharReach CharReach::operator~(void) const { - CharReach cr(*this); - cr.flip(); - return cr; -} - -/// Bitwise XOR. -CharReach CharReach::operator^(const CharReach &a) const { - CharReach cr(*this); - cr.bits ^= a.bits; - return cr; -} - /// Do we only contain bits representing alpha characters? bool CharReach::isAlpha() const { if (none()) { diff --git a/src/util/charreach.h b/src/util/charreach.h index 64bd969e5..53f2a5d27 100644 --- a/src/util/charreach.h +++ b/src/util/charreach.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -135,22 +135,38 @@ class CharReach { size_t find_nth(size_t n) const { return bits.find_nth(n); } /// Bitwise OR. - CharReach operator|(const CharReach &a) const; + CharReach operator|(const CharReach &a) const { + CharReach cr(*this); + cr.bits |= a.bits; + return cr; + } /// Bitwise OR-equals. - void operator|=(const CharReach &a); + void operator|=(const CharReach &a) { bits |= a.bits; } /// Bitwise AND. - CharReach operator&(const CharReach &a) const; + CharReach operator&(const CharReach &a) const { + CharReach cr(*this); + cr.bits &= a.bits; + return cr; + } /// Bitwise AND-equals. - void operator&=(const CharReach &a); + void operator&=(const CharReach &a) { bits &= a.bits; } /// Bitwise XOR. - CharReach operator^(const CharReach &a) const; + CharReach operator^(const CharReach &a) const { + CharReach cr(*this); + cr.bits ^= a.bits; + return cr; + } /// Bitwise complement. - CharReach operator~(void) const; + CharReach operator~(void) const { + CharReach cr(*this); + cr.flip(); + return cr; + } /// Do we only contain bits representing alpha characters? bool isAlpha() const; From 41751c4f3b2b714c751ffe7b77773fb2bf346375 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 12 Apr 2016 09:52:53 +1000 Subject: [PATCH 146/218] bitfield: unroll main operators --- src/util/bitfield.h | 69 +++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/src/util/bitfield.h b/src/util/bitfield.h index 208c2ef51..a71c1f88d 100644 --- a/src/util/bitfield.h +++ b/src/util/bitfield.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -187,8 +187,15 @@ class bitfield { size_t count() const { static_assert(block_size == 64, "adjust popcount for block_type"); size_t sum = 0; - for (const auto &e : bits) { - sum += popcount64(e); + size_t i = 0; + for (; i + 4 <= num_blocks; i += 4) { + sum += popcount64(bits[i]); + sum += popcount64(bits[i + 1]); + sum += popcount64(bits[i + 2]); + sum += popcount64(bits[i + 3]); + } + for (; i < num_blocks; i++) { + sum += popcount64(bits[i]); } assert(sum <= size()); return sum; @@ -298,49 +305,61 @@ class bitfield { } /// Bitwise OR. - bitfield operator|(const bitfield &a) const { - bitfield cr; - for (size_t i = 0; i < bits.size(); i++) { - cr.bits[i] = bits[i] | a.bits[i]; - } - return cr; + bitfield operator|(bitfield a) const { + a |= *this; + return a; } /// Bitwise OR-equals. void operator|=(const bitfield &a) { - for (size_t i = 0; i < bits.size(); i++) { + size_t i = 0; + for (; i + 4 <= num_blocks; i += 4) { + bits[i] |= a.bits[i]; + bits[i + 1] |= a.bits[i + 1]; + bits[i + 2] |= a.bits[i + 2]; + bits[i + 3] |= a.bits[i + 3]; + } + for (; i < num_blocks; i++) { bits[i] |= a.bits[i]; } } /// Bitwise AND. - bitfield operator&(const bitfield &a) const { - bitfield cr; - for (size_t i = 0; i < bits.size(); i++) { - cr.bits[i] = bits[i] & a.bits[i]; - } - return cr; + bitfield operator&(bitfield a) const { + a &= *this; + return a; } /// Bitwise AND-equals. void operator&=(const bitfield &a) { - for (size_t i = 0; i < bits.size(); i++) { + size_t i = 0; + for (; i + 4 <= num_blocks; i += 4) { + bits[i] &= a.bits[i]; + bits[i + 1] &= a.bits[i + 1]; + bits[i + 2] &= a.bits[i + 2]; + bits[i + 3] &= a.bits[i + 3]; + } + for (; i < num_blocks; i++) { bits[i] &= a.bits[i]; } } /// Bitwise XOR. - bitfield operator^(const bitfield &a) const { - bitfield cr; - for (size_t i = 0; i < bits.size(); i++) { - cr.bits[i] = bits[i] ^ a.bits[i]; - } - return cr; + bitfield operator^(bitfield a) const { + a ^= *this; + return a; } /// Bitwise XOR-equals. - void operator^=(const bitfield &a) { - for (size_t i = 0; i < bits.size(); i++) { + void operator^=(bitfield a) { + size_t i = 0; + for (; i + 4 <= num_blocks; i += 4) { + bits[i] ^= a.bits[i]; + bits[i + 1] ^= a.bits[i + 1]; + bits[i + 2] ^= a.bits[i + 2]; + bits[i + 3] ^= a.bits[i + 3]; + } + for (; i < num_blocks; i++) { bits[i] ^= a.bits[i]; } } From 972126214978c27c37799b1686f45967bfa4621c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 13 Apr 2016 10:19:39 +1000 Subject: [PATCH 147/218] shufti: small C++11 fix to silence clang warning --- src/nfa/shufticompile.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/nfa/shufticompile.cpp b/src/nfa/shufticompile.cpp index f909a0b8c..217fcee03 100644 --- a/src/nfa/shufticompile.cpp +++ b/src/nfa/shufticompile.cpp @@ -155,15 +155,17 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, u16 a_hi = 1U << (p.first >> 4); u16 b_lo = 1U << (p.second & 0xf); u16 b_hi = 1U << (p.second >> 4); - nibble_masks.push_back({a_lo, a_hi, b_lo, b_hi}); + nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}}); } // one-byte literals (second byte is a wildcard) for (size_t it = onechar.find_first(); it != CharReach::npos; it = onechar.find_next(it)) { DEBUG_PRINTF("%02hhx\n", (u8)it); - nibble_masks.push_back({(u16)(1U << (it & 0xf)), (u16)(1U << (it >> 4)), - 0xffff, 0xffff}); + u16 a_lo = 1U << (it & 0xf); + u16 a_hi = 1U << (it >> 4); + u16 wildcard = 0xffff; + nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}}); } // try to merge strings into shared buckets From 68851742cc95071ea1d966727b4bdf27d9567528 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 13 Apr 2016 12:00:36 +1000 Subject: [PATCH 148/218] Help ICC perform a not m128 --- src/nfa/accel_dump.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 40abd12c3..6e2b8f419 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -41,6 +41,7 @@ #include "util/charreach.h" #include "util/dump_charclass.h" #include "util/dump_mask.h" +#include "util/simd_utils.h" #include #include @@ -170,8 +171,8 @@ vector shufti2cr_array(const m128 lo_in, const m128 hi_in) { static void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1, const m128 &lo2, const m128 &hi2) { - vector cr1 = shufti2cr_array(~lo1, ~hi1); - vector cr2 = shufti2cr_array(~lo2, ~hi2); + vector cr1 = shufti2cr_array(not128(lo1), not128(hi1)); + vector cr2 = shufti2cr_array(not128(lo2), not128(hi2)); map > cr1_group; assert(cr1.size() == 8 && cr2.size() == 8); for (u32 i = 0; i < 8; i++) { From 36150bbc191f3c884bd23b4d662bffbd76429584 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 22 Mar 2016 16:45:09 +1100 Subject: [PATCH 149/218] Rose: replace internal_report with program Replace the use of the internal_report structure (for reports from engines, MPV etc) with the Rose program interpreter. SOM processing was reworked to use a new som_operation structure that is embedded in the appropriate instructions. --- CMakeLists.txt | 4 +- src/nfagraph/ng.cpp | 1 + src/nfagraph/ng_dump.cpp | 92 +++++---- src/nfagraph/ng_puff.cpp | 2 +- src/nfagraph/ng_som.cpp | 43 +++- src/nfagraph/ng_som.h | 2 + src/report.h | 335 ++----------------------------- src/rose/block.c | 9 +- src/rose/catchup.c | 257 +++++------------------- src/rose/catchup.h | 3 - src/rose/eod.c | 62 +++--- src/rose/match.c | 101 +++++++--- src/rose/match.h | 13 +- src/rose/program_runtime.h | 174 +++++++++------- src/rose/rose.h | 17 +- src/rose/rose_build_bytecode.cpp | 273 +++++++++++++++++++------ src/rose/rose_build_merge.cpp | 2 +- src/rose/rose_dump.cpp | 111 ++++++---- src/rose/rose_internal.h | 11 +- src/rose/rose_program.h | 49 +++-- src/rose/runtime.h | 13 +- src/rose/stream.c | 5 +- src/runtime.c | 140 ++----------- src/scratch.h | 2 - src/som/som_operation.h | 84 ++++++++ src/som/som_runtime.c | 80 ++++---- src/som/som_runtime.h | 12 +- src/util/exhaust.h | 4 +- src/util/internal_report.h | 201 ------------------- src/util/report.cpp | 69 ------- src/util/report.h | 58 +++--- 31 files changed, 875 insertions(+), 1354 deletions(-) create mode 100644 src/som/som_operation.h delete mode 100644 src/util/internal_report.h delete mode 100644 src/util/report.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d1d741ed..a4973c026 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -479,6 +479,7 @@ set (hs_exec_SRCS src/nfa/vermicelli_run.h src/nfa/vermicelli_sse.h src/som/som.h + src/som/som_operation.h src/som/som_runtime.h src/som/som_runtime.c src/som/som_stream.c @@ -795,6 +796,7 @@ SET (hs_SRCS src/som/slot_manager.h src/som/slot_manager_internal.h src/som/som.h + src/som/som_operation.h src/rose/rose_build.h src/rose/rose_build_add.cpp src/rose/rose_build_add_internal.h @@ -849,7 +851,6 @@ SET (hs_SRCS src/util/dump_mask.cpp src/util/dump_mask.h src/util/graph.h - src/util/internal_report.h src/util/multibit_build.cpp src/util/multibit_build.h src/util/order_check.h @@ -857,7 +858,6 @@ SET (hs_SRCS src/util/partitioned_set.h src/util/popcount.h src/util/queue_index_factory.h - src/util/report.cpp src/util/report.h src/util/report_manager.cpp src/util/report_manager.h diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index bc3aea381..b4b34d741 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -130,6 +130,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, assert(g.kind == NFA_OUTFIX); dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey); + makeReportsSomPass(ng.rm, g); auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, ng.cc.grey); if (haig) { diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index 175082f87..60122cf3f 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -358,35 +358,38 @@ void dumpSmallWrite(const RoseEngine *rose, const Grey &grey) { smwrDumpNFA(smwr, false, grey.dumpPath); } -static UNUSED -const char *irTypeToString(u8 type) { -#define IR_TYPE_CASE(x) case x: return #x +static +const char *reportTypeToString(ReportType type) { +#define REPORT_TYPE_CASE(x) case x: return #x switch (type) { - IR_TYPE_CASE(EXTERNAL_CALLBACK); - IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_REL); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_IF_UNSET); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_IF_WRITABLE); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE); - IR_TYPE_CASE(INTERNAL_SOM_LOC_COPY); - IR_TYPE_CASE(INTERNAL_SOM_LOC_COPY_IF_WRITABLE); - IR_TYPE_CASE(INTERNAL_SOM_LOC_MAKE_WRITABLE); - IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_STORED); - IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_ABS); - IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_REV_NFA); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_FROM); - IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE); - IR_TYPE_CASE(INTERNAL_ROSE_CHAIN); - default: return ""; + REPORT_TYPE_CASE(EXTERNAL_CALLBACK); + REPORT_TYPE_CASE(EXTERNAL_CALLBACK_SOM_REL); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET_IF_UNSET); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET_IF_WRITABLE); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_COPY); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_COPY_IF_WRITABLE); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_MAKE_WRITABLE); + REPORT_TYPE_CASE(EXTERNAL_CALLBACK_SOM_STORED); + REPORT_TYPE_CASE(EXTERNAL_CALLBACK_SOM_ABS); + REPORT_TYPE_CASE(EXTERNAL_CALLBACK_SOM_REV_NFA); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET_FROM); + REPORT_TYPE_CASE(INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE); + REPORT_TYPE_CASE(INTERNAL_ROSE_CHAIN); + REPORT_TYPE_CASE(EXTERNAL_CALLBACK_SOM_PASS); } -#undef IR_TYPE_CASE +#undef REPORT_TYPE_CASE + + assert(0); + return ""; } -static really_inline -int isReverseNfaReport(const Report &ri) { - switch (ri.type) { +static +int isReverseNfaReport(const Report &report) { + switch (report.type) { case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: @@ -398,9 +401,9 @@ int isReverseNfaReport(const Report &ri) { return 0; } -static really_inline -int isSomRelSetReport(const Report &ri) { - switch (ri.type) { +static +int isSomRelSetReport(const Report &report) { + switch (report.type) { case INTERNAL_SOM_LOC_SET: case INTERNAL_SOM_LOC_SET_IF_UNSET: case INTERNAL_SOM_LOC_SET_IF_WRITABLE: @@ -420,31 +423,34 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) { ss << grey.dumpPath << "internal_reports.txt"; FILE *f = fopen(ss.str().c_str(), "w"); const vector &reports = rm.reports(); - for (u32 i = 0; i < reports.size(); i++) { - const Report &ir = reports[i]; - fprintf(f, "int %u: %s onmatch: %u", i, irTypeToString(ir.type), - ir.onmatch); + for (size_t i = 0; i < reports.size(); i++) { + const Report &report = reports[i]; + fprintf(f, "%zu: %s onmatch: %u", i, reportTypeToString(report.type), + report.onmatch); - u32 dkey = rm.getDkey(ir); + u32 dkey = rm.getDkey(report); if (dkey != MO_INVALID_IDX) { fprintf(f, " dkey %u", dkey); } - if (ir.ekey != MO_INVALID_IDX) { - fprintf(f, " ekey %u", ir.ekey); + if (report.ekey != INVALID_EKEY) { + fprintf(f, " ekey %u", report.ekey); } - if (ir.hasBounds()) { + if (report.hasBounds()) { fprintf(f, " hasBounds (minOffset=%llu, maxOffset=%llu, " "minLength=%llu)", - ir.minOffset, ir.maxOffset, ir.minLength); + report.minOffset, report.maxOffset, report.minLength); + } + if (report.quashSom) { + fprintf(f, " quashSom"); } - if (ir.offsetAdjust != 0) { - fprintf(f, " offsetAdjust: %d", ir.offsetAdjust); + if (report.offsetAdjust != 0) { + fprintf(f, " offsetAdjust: %d", report.offsetAdjust); } - if (isReverseNfaReport(ir)) { - fprintf(f, " reverse nfa: %u", ir.revNfaIndex); + if (isReverseNfaReport(report)) { + fprintf(f, " reverse nfa: %u", report.revNfaIndex); } - if (isSomRelSetReport(ir)) { - fprintf(f, " set, adjust: %lld", ir.somDistance); + if (isSomRelSetReport(report)) { + fprintf(f, " set, adjust: %lld", report.somDistance); } fprintf(f, "\n"); } diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index c8b6843d7..540f48592 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -293,7 +293,7 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, u32 squashDistance = allowedSquashDistance(cr, width, g, puffv, prefilter); - Report ir = makeRoseTrigger(event, squashDistance); + Report ir = makeMpvTrigger(event, squashDistance); /* only need to trigger once if floatingUnboundedDot */ bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth; if (floatingUnboundedDot) { diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index f26b62aa6..03a612a04 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -445,8 +445,9 @@ void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) { } static -void setPrefixReports(ReportManager &rm, NGHolder &g, u8 ir_type, u32 som_loc, - const vector &depths, bool prefix_by_rev) { +void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type, + u32 som_loc, const vector &depths, + bool prefix_by_rev) { Report ir = makeCallback(0U, 0); ir.type = ir_type; ir.onmatch = som_loc; @@ -470,7 +471,7 @@ void setPrefixReports(ReportManager &rm, NGHolder &g, u8 ir_type, u32 som_loc, } static -void updatePrefixReports(ReportManager &rm, NGHolder &g, u8 ir_type) { +void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) { /* update the som action on the prefix report */ for (auto v : inv_adjacent_vertices_range(g.accept, g)) { auto &reports = g[v].reports; @@ -555,7 +556,8 @@ bool finalRegion(const NGHolder &g, static void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, - NFAVertex v, u8 ir_type, u64a param) { + NFAVertex v, ReportType ir_type, + u64a param) { assert(!g[v].reports.empty()); flat_set r_new; @@ -2409,6 +2411,33 @@ bool splitOffBestLiteral(const NGHolder &g, return true; } +/** + * Replace the given graph's EXTERNAL_CALLBACK reports with + * EXTERNAL_CALLBACK_SOM_PASS reports. + */ +void makeReportsSomPass(ReportManager &rm, NGHolder &g) { + for (const auto &v : vertices_range(g)) { + const auto &reports = g[v].reports; + if (reports.empty()) { + continue; + } + + flat_set new_reports; + for (const ReportID &id : reports) { + const Report &report = rm.getReport(id); + if (report.type != EXTERNAL_CALLBACK) { + new_reports.insert(id); + continue; + } + Report report2 = report; + report2.type = EXTERNAL_CALLBACK_SOM_PASS; + new_reports.insert(rm.getInternalId(report2)); + } + + g[v].reports = new_reports; + } +} + static bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { ue2_literal lit; @@ -2431,6 +2460,8 @@ bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); + makeReportsSomPass(ng.rm, *rhs); + dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); vector > triggers; @@ -2493,6 +2524,8 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, return false; /* TODO: handle */ } + makeReportsSomPass(ng.rm, *rhs); + dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey); dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey); @@ -2628,6 +2661,8 @@ bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) { return false; } + makeReportsSomPass(ng.rm, *rhs); + dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey); vector> triggers; diff --git a/src/nfagraph/ng_som.h b/src/nfagraph/ng_som.h index 0ae2f17b9..707109454 100644 --- a/src/nfagraph/ng_som.h +++ b/src/nfagraph/ng_som.h @@ -72,6 +72,8 @@ sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, som_type som); +void makeReportsSomPass(ReportManager &rm, NGHolder &g); + } // namespace ue2 #endif // NG_SOM_H diff --git a/src/report.h b/src/report.h index 28560907c..861abb37e 100644 --- a/src/report.h +++ b/src/report.h @@ -43,7 +43,6 @@ #include "som/som_runtime.h" #include "util/exhaust.h" #include "util/fatbit.h" -#include "util/internal_report.h" static really_inline int satisfiesMinLength(u64a min_len, u64a from_offset, @@ -68,14 +67,12 @@ enum DedupeResult { static really_inline enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, - const struct internal_report *ir, struct hs_scratch *scratch, u64a offset, - u64a from_offset, u64a to_offset, - const char do_som) { + u64a from_offset, u64a to_offset, u32 dkey, + s32 offset_adjust, char is_external_report, + char quash_som, const char do_som) { DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u, do_som=%d\n", offset, - from_offset, to_offset, ir->dkey, do_som); - DEBUG_PRINTF("report type=%u, quashSom=%d\n", ir->type, ir->quashSom); - const u32 dkey = ir->dkey; + from_offset, to_offset, dkey, do_som); // We should not have been called if there's no dedupe work to do. assert(do_som || dkey != MO_INVALID_IDX); @@ -99,10 +96,9 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, if (dkey != MO_INVALID_IDX) { const u32 dkeyCount = rose->dkeyCount; - const s32 offset_adj = ir->offsetAdjust; - if (ir->type == EXTERNAL_CALLBACK || ir->quashSom) { + if (is_external_report || quash_som) { DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); + assert(offset_adjust == 0 || offset_adjust == -1); if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { /* we have already raised this report at this offset, squash * dupe match. */ @@ -112,7 +108,7 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, } else if (do_som) { /* SOM external event */ DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); + assert(offset_adjust == 0 || offset_adjust == -1); u64a *starts = deduper->som_start_log[to_offset % 2]; if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) { starts[dkey] = MIN(starts[dkey], from_offset); @@ -121,7 +117,7 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, } DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]); - if (offset_adj) { + if (offset_adjust) { deduper->som_log_dirty |= 1; } else { deduper->som_log_dirty |= 2; @@ -134,176 +130,6 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, return DEDUPE_CONTINUE; } -static really_inline -enum DedupeResult dedupeCatchupSom(const struct RoseEngine *rose, - const struct internal_report *ir, - struct hs_scratch *scratch, u64a offset, - u64a from_offset, u64a to_offset) { - DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u\n", offset, - from_offset, to_offset, ir->dkey); - DEBUG_PRINTF("report type=%u, quashSom=%d\n", ir->type, ir->quashSom); - - struct match_deduper *deduper = &scratch->deduper; - if (offset != deduper->current_report_offset) { - assert(deduper->current_report_offset == ~0ULL || - deduper->current_report_offset < offset); - if (offset == deduper->current_report_offset + 1) { - fatbit_clear(deduper->log[offset % 2]); - } else { - fatbit_clear(deduper->log[0]); - fatbit_clear(deduper->log[1]); - } - - if (flushStoredSomMatches(scratch, offset)) { - return DEDUPE_HALT; - } - deduper->current_report_offset = offset; - } - - const u32 dkey = ir->dkey; - if (dkey != MO_INVALID_IDX) { - const u32 dkeyCount = rose->dkeyCount; - const s32 offset_adj = ir->offsetAdjust; - if (ir->quashSom) { - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); - if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { - /* we have already raised this report at this offset, squash - * dupe match. */ - DEBUG_PRINTF("dedupe\n"); - return DEDUPE_SKIP; - } - } else { - /* SOM external event */ - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); - u64a *starts = deduper->som_start_log[to_offset % 2]; - if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) { - starts[dkey] = MIN(starts[dkey], from_offset); - } else { - starts[dkey] = from_offset; - } - DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]); - - if (offset_adj) { - deduper->som_log_dirty |= 1; - } else { - deduper->som_log_dirty |= 2; - } - - return DEDUPE_SKIP; - } - } - - return DEDUPE_CONTINUE; -} - -static really_inline -int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, - char is_simple, char do_som) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - DEBUG_PRINTF("internal report %u\n", id); - const struct internal_report *ir = getInternalReport(rose, id); - - assert(isExternalReport(ir)); /* only external reports should reach here */ - - s32 offset_adj = ir->offsetAdjust; - u64a to_offset = offset; - u64a from_offset = 0; - - u32 flags = 0; -#ifndef RELEASE_BUILD - if (offset_adj) { - // alert testing tools that we've got adjusted matches - flags |= HS_MATCH_FLAG_ADJUSTED; - } -#endif - - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", offset, id, ir->type, ir->onmatch, - offset_adj); - - if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */ - DEBUG_PRINTF("pre broken - halting\n"); - return MO_HALT_MATCHING; - } - - if (!is_simple && ir->hasBounds) { - assert(ir->minOffset || ir->minLength || ir->maxOffset < MAX_OFFSET); - assert(ir->minOffset <= ir->maxOffset); - if (offset < ir->minOffset || offset > ir->maxOffset) { - DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - offset, ir->minOffset, ir->maxOffset); - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } - } - - if (!is_simple && ir->ekey != INVALID_EKEY && - unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { - DEBUG_PRINTF("ate exhausted match\n"); - return MO_CONTINUE_MATCHING; - } - - if (ir->type == EXTERNAL_CALLBACK) { - from_offset = 0; - } else if (do_som) { - from_offset = handleSomExternal(scratch, ir, to_offset); - } - - to_offset += offset_adj; - assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - - if (do_som && ir->minLength) { - if (!satisfiesMinLength(ir->minLength, from_offset, to_offset)) { - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } - if (ir->quashSom) { - from_offset = 0; - } - } - - DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ir->onmatch, ci->userContext); - - int halt = 0; - - if (do_som || ir->dkey != MO_INVALID_IDX) { - enum DedupeResult dedupe_rv = dedupeCatchup(rose, ir, scratch, offset, - from_offset, to_offset, do_som); - switch (dedupe_rv) { - case DEDUPE_HALT: - halt = 1; - goto exit; - case DEDUPE_SKIP: - halt = 0; - goto exit; - case DEDUPE_CONTINUE: - break; - } - } - - halt = ci->userCallback((unsigned int)ir->onmatch, from_offset, to_offset, - flags, ci->userContext); -exit: - if (halt) { - DEBUG_PRINTF("callback requested to terminate matches\n"); - ci->status |= STATUS_TERMINATED; - return MO_HALT_MATCHING; - } - - if (!is_simple && ir->ekey != INVALID_EKEY) { - markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); - return MO_CONTINUE_MATCHING; - } else { - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } -} - /** * \brief Deliver the given report to the user callback. * @@ -311,8 +137,8 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, * that dedupe catchup has been done. */ static really_inline -int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, - s32 offset_adjust, struct hs_scratch *scratch, u32 ekey) { +int roseDeliverReport(u64a offset, ReportID onmatch, s32 offset_adjust, + struct hs_scratch *scratch, u32 ekey) { assert(scratch); assert(scratch->magic == SCRATCH_MAGIC); @@ -326,19 +152,7 @@ int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, } #endif -#ifndef NDEBUG - // Assertions for development builds. - UNUSED const struct internal_report *ir = getInternalReport(ci->rose, id); - assert(isExternalReport(ir)); /* only external reports should reach here */ - assert(!can_stop_matching(scratch)); - assert(!ir->hasBounds || - (offset >= ir->minOffset && offset <= ir->maxOffset)); - assert(ir->type == EXTERNAL_CALLBACK); - assert(!ir->minLength); - assert(!ir->quashSom); -#endif - assert(ekey == INVALID_EKEY || !isExhausted(ci->rose, ci->exhaustionVector, ekey)); @@ -364,102 +178,6 @@ int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, } } -static really_inline -int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, - struct hs_scratch *scratch, char is_simple) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - u32 flags = 0; - - struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - const struct internal_report *ir = getInternalReport(rose, id); - - /* internal events should be handled by rose directly */ - assert(ir->type == EXTERNAL_CALLBACK); - - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", to_offset, id, ir->type, ir->onmatch, - ir->offsetAdjust); - - if (unlikely(can_stop_matching(scratch))) { - DEBUG_PRINTF("pre broken - halting\n"); - return MO_HALT_MATCHING; - } - - if (!is_simple && ir->hasBounds) { - assert(ir->minOffset || ir->minLength || ir->maxOffset < MAX_OFFSET); - if (to_offset < ir->minOffset || to_offset > ir->maxOffset) { - DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - to_offset, ir->minOffset, ir->maxOffset); - return MO_CONTINUE_MATCHING; - } - } - - int halt = 0; - - if (!is_simple && ir->ekey != INVALID_EKEY && - unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { - DEBUG_PRINTF("ate exhausted match\n"); - goto exit; - } - - u64a offset = to_offset; - - to_offset += ir->offsetAdjust; - assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - - if (!is_simple && ir->minLength) { - if (!satisfiesMinLength(ir->minLength, from_offset, to_offset)) { - return MO_CONTINUE_MATCHING; - } - if (ir->quashSom) { - from_offset = 0; - } - } - - DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ir->onmatch, ci->userContext); - -#ifndef RELEASE_BUILD - if (ir->offsetAdjust != 0) { - // alert testing tools that we've got adjusted matches - flags |= HS_MATCH_FLAG_ADJUSTED; - } -#endif - - enum DedupeResult dedupe_rv = - dedupeCatchupSom(rose, ir, scratch, offset, from_offset, to_offset); - switch (dedupe_rv) { - case DEDUPE_HALT: - halt = 1; - goto exit; - case DEDUPE_SKIP: - halt = 0; - goto exit; - case DEDUPE_CONTINUE: - break; - } - - halt = ci->userCallback((unsigned int)ir->onmatch, from_offset, to_offset, - flags, ci->userContext); - - if (!is_simple && ir->ekey != INVALID_EKEY) { - markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); - } - -exit: - if (halt) { - DEBUG_PRINTF("callback requested to terminate matches\n"); - ci->status |= STATUS_TERMINATED; - return MO_HALT_MATCHING; - } - - return MO_CONTINUE_MATCHING; -} - /** * \brief Deliver the given SOM report to the user callback. * @@ -467,42 +185,34 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, * that dedupe catchup has been done. */ static really_inline -int roseDeliverSomReport(u64a from_offset, u64a to_offset, - const struct internal_report *ir, - struct hs_scratch *scratch, char is_exhaustible) { +int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID onmatch, + s32 offset_adjust, struct hs_scratch *scratch, + u32 ekey) { assert(scratch); assert(scratch->magic == SCRATCH_MAGIC); - assert(isExternalReport(ir)); /* only external reports should reach here */ struct core_info *ci = &scratch->core_info; u32 flags = 0; #ifndef RELEASE_BUILD - if (ir->offsetAdjust != 0) { + if (offset_adjust) { // alert testing tools that we've got adjusted matches flags |= HS_MATCH_FLAG_ADJUSTED; } #endif assert(!can_stop_matching(scratch)); - assert(!ir->hasBounds || - (to_offset >= ir->minOffset && to_offset <= ir->maxOffset)); - assert(ir->ekey == INVALID_EKEY || - !isExhausted(ci->rose, ci->exhaustionVector, ir->ekey)); + assert(ekey == INVALID_EKEY || + !isExhausted(ci->rose, ci->exhaustionVector, ekey)); - to_offset += ir->offsetAdjust; + to_offset += offset_adjust; assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - assert(!ir->minLength || - satisfiesMinLength(ir->minLength, from_offset, to_offset)); - assert(!ir->quashSom || from_offset == 0); - DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ir->onmatch, ci->userContext); - + from_offset, to_offset, onmatch, ci->userContext); - int halt = ci->userCallback((unsigned int)ir->onmatch, from_offset, - to_offset, flags, ci->userContext); + int halt = ci->userCallback(onmatch, from_offset, to_offset, flags, + ci->userContext); if (halt) { DEBUG_PRINTF("callback requested to terminate matches\n"); @@ -510,9 +220,8 @@ int roseDeliverSomReport(u64a from_offset, u64a to_offset, return MO_HALT_MATCHING; } - if (is_exhaustible) { - assert(ir->ekey != INVALID_EKEY); - markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); + if (ekey != INVALID_EKEY) { + markAsMatched(ci->rose, ci->exhaustionVector, ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; diff --git a/src/rose/block.c b/src/rose/block.c index 749bf4e2f..5fc5c8a14 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -133,7 +133,6 @@ void init_outfixes_for_block(const struct RoseEngine *t, static really_inline void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback, char *state, char is_small_block) { init_state_for_block(t, state); @@ -144,8 +143,6 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->delayLastEndOffset = 0; tctxt->lastEndOffset = 0; tctxt->filledDelayedSlots = 0; - tctxt->cb = callback; - tctxt->cb_som = som_callback; tctxt->lastMatchOffset = 0; tctxt->minMatchOffset = 0; tctxt->minNonMpvMatchOffset = 0; @@ -160,8 +157,7 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, init_outfixes_for_block(t, scratch, state, is_small_block); } -void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback) { +void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(t); assert(scratch); assert(scratch->core_info.buf); @@ -179,8 +175,7 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, char *state = scratch->core_info.state; - init_for_block(t, scratch, callback, som_callback, state, - is_small_block); + init_for_block(t, scratch, state, is_small_block); struct RoseContext *tctxt = &scratch->tctxt; diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 11178675d..7c44bf9ff 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -33,6 +33,7 @@ #include "catchup.h" #include "match.h" +#include "program_runtime.h" #include "rose.h" #include "nfa/nfa_rev_api.h" #include "nfa/mpv.h" @@ -46,43 +47,18 @@ typedef struct queue_match PQ_T; #include "util/pqueue.h" static really_inline -int handleReportInternally(const struct RoseEngine *t, - struct hs_scratch *scratch, ReportID id, - u64a offset) { - const struct internal_report *ri = getInternalReport(t, id); - if (ri->type == EXTERNAL_CALLBACK) { - return 0; - } - if (isInternalSomReport(ri)) { - handleSomInternal(scratch, ri, offset); - return 1; - } - if (ri->type == INTERNAL_ROSE_CHAIN) { - roseHandleChainMatch(t, scratch, id, offset, 1); - return 1; - } - - return 0; -} +int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, + u64a som, u64a offset, ReportID id, const char from_mpv) { + assert(id < rose->reportProgramCount); + const u32 *programs = getByOffset(rose, rose->reportProgramOffset); -static really_inline -int handleReportInternallyNoChain(const struct RoseEngine *t, - struct hs_scratch *scratch, ReportID id, - u64a offset) { - const struct internal_report *ri = getInternalReport(t, id); - if (ri->type == EXTERNAL_CALLBACK) { - return 0; - } - if (isInternalSomReport(ri)) { - handleSomInternal(scratch, ri, offset); - return 1; - } - if (ri->type == INTERNAL_ROSE_CHAIN) { - assert(0); /* chained engines cannot trigger other engines */ - return 1; - } + const size_t match_len = 0; // Unused in this path. + const char in_anchored = 0; + const char in_catchup = 1; + roseRunProgram(rose, scratch, programs[id], som, offset, match_len, + in_anchored, in_catchup, from_mpv, 0); - return 0; + return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; } static really_inline @@ -270,50 +246,15 @@ hwlmcb_rv_t runNewNfaToNextMatch(const struct RoseEngine *t, u32 qi, } /* for use by mpv (chained) only */ -static UNUSED +static int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("called\n"); - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", offset, id); - updateLastMatchOffset(tctxt, offset); - - if (handleReportInternallyNoChain(t, scratch, id, offset)) { - return MO_CONTINUE_MATCHING; - } - int cb_rv = tctxt->cb(offset, id, scratch); - if (cb_rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return MO_CONTINUE_MATCHING; - } else { - assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, 0, - scratch->core_info.exhaustionVector); - } -} - -/* for use by mpv (chained) only */ -static UNUSED -int roseNfaFinalBlastAdaptorNoInternal(u64a offset, ReportID id, - void *context) { - struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; - const struct RoseEngine *t = scratch->core_info.rose; - - DEBUG_PRINTF("called\n"); - /* chained nfas are run under the control of the anchored catchup */ - - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); - updateLastMatchOffset(tctxt, offset); - - int cb_rv = tctxt->cb(offset, id, scratch); + int cb_rv = roseNfaRunProgram(t, scratch, 0, offset, id, 1); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -418,11 +359,7 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, assert(!q->report_current); - if (info->only_external) { - q->cb = roseNfaFinalBlastAdaptorNoInternal; - } else { - q->cb = roseNfaFinalBlastAdaptor; - } + q->cb = roseNfaFinalBlastAdaptor; q->som_cb = NULL; DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", @@ -466,64 +403,28 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, : HWLM_CONTINUE_MATCHING; } -static UNUSED -int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { - struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; - const struct RoseEngine *t = scratch->core_info.rose; - - const struct internal_report *ri = getInternalReport(t, id); - - DEBUG_PRINTF("called\n"); - if (ri->type != INTERNAL_ROSE_CHAIN) { - /* INTERNAL_ROSE_CHAIN are not visible externally */ - if (roseCatchUpMPV(t, offset - scratch->core_info.buf_offset, - scratch) == HWLM_TERMINATE_MATCHING) { - DEBUG_PRINTF("done\n"); - return MO_HALT_MATCHING; - } - } - - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); - - if (handleReportInternally(t, scratch, id, offset)) { - return can_stop_matching(scratch) ? MO_HALT_MATCHING - : MO_CONTINUE_MATCHING; - } - - updateLastMatchOffset(tctxt, offset); - - int cb_rv = tctxt->cb(offset, id, scratch); - if (cb_rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return MO_CONTINUE_MATCHING; - } else { - assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); +static really_inline +char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) { + const struct RoseContext *tctxt = &scratch->tctxt; + assert(tctxt->curr_qi < rose->queueCount); + if (tctxt->curr_qi < rose->outfixBeginQueue) { + assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA_0); + return 1; } + return 0; } -static UNUSED -int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { +static +int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { struct hs_scratch *scratch = context; struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("called\n"); - if (roseCatchUpMPV(t, offset - scratch->core_info.buf_offset, scratch) == - HWLM_TERMINATE_MATCHING) { - DEBUG_PRINTF("done\n"); - return MO_HALT_MATCHING; - } - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", offset, id); - updateLastMatchOffset(tctxt, offset); - int cb_rv = tctxt->cb(offset, id, scratch); + const char from_mpv = in_mpv(t, scratch); + int cb_rv = roseNfaRunProgram(t, scratch, 0, offset, id, from_mpv); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -535,8 +436,8 @@ int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { } } -static UNUSED -int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { +static +int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { struct hs_scratch *scratch = context; struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; @@ -544,13 +445,9 @@ int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", offset, id); - updateLastMatchOffset(tctxt, offset); + assert(!in_mpv(t, scratch)); - if (handleReportInternallyNoChain(t, scratch, id, offset)) { - return MO_CONTINUE_MATCHING; - } - - int cb_rv = tctxt->cb(offset, id, scratch); + int cb_rv = roseNfaRunProgram(t, scratch, 0, offset, id, 0); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -562,51 +459,21 @@ int roseNfaBlastAdaptorNoChain(u64a offset, ReportID id, void *context) { } } -static UNUSED -int roseNfaBlastAdaptorNoInternalNoChain(u64a offset, ReportID id, - void *context) { - struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; - const struct RoseEngine *t = scratch->core_info.rose; - - /* chained nfas are run under the control of the anchored catchup */ - - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); - updateLastMatchOffset(tctxt, offset); - - int cb_rv = tctxt->cb(offset, id, scratch); - if (cb_rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return MO_CONTINUE_MATCHING; - } else { - assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); - } -} - -static UNUSED +static int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context) { struct hs_scratch *scratch = context; struct RoseContext *tctxt = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("called\n"); - if (roseCatchUpMPV(t, offset - scratch->core_info.buf_offset, scratch) == - HWLM_TERMINATE_MATCHING) { - DEBUG_PRINTF("roseCatchUpNfas done\n"); - return MO_HALT_MATCHING; - } - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", offset, id); - updateLastMatchOffset(tctxt, offset); + + assert(!in_mpv(t, scratch)); /* must be a external report as haig cannot directly participate in chain */ - int cb_rv = tctxt->cb_som(from_offset, offset, id, scratch); + int cb_rv = roseNfaRunProgram(scratch->core_info.rose, scratch, from_offset, + offset, id, 0); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -620,38 +487,20 @@ int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, int roseNfaAdaptor(u64a offset, ReportID id, void *context) { struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); - updateLastMatchOffset(tctxt, offset); - - const struct RoseEngine *t = scratch->core_info.rose; - if (handleReportInternally(t, scratch, id, offset)) { - return can_stop_matching(scratch) ? MO_HALT_MATCHING - : MO_CONTINUE_MATCHING; - } - - return tctxt->cb(offset, id, scratch); -} - -int roseNfaAdaptorNoInternal(u64a offset, ReportID id, void *context) { - struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; - DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); - updateLastMatchOffset(tctxt, offset); - - return tctxt->cb(offset, id, scratch); + return roseNfaRunProgram(scratch->core_info.rose, scratch, 0, offset, id, + 0); } int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context) { struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); - updateLastMatchOffset(tctxt, offset); /* must be a external report as haig cannot directly participate in chain */ - return tctxt->cb_som(from_offset, offset, id, scratch); + return roseNfaRunProgram(scratch->core_info.rose, scratch, from_offset, + offset, id, 0); } static really_inline @@ -661,29 +510,17 @@ char blast_queue(const struct RoseEngine *t, struct hs_scratch *scratch, const struct NfaInfo *info = getNfaInfoByQueue(t, qi); tctxt->curr_qi = qi; - if (has_chained_nfas(t)) { - if (info->only_external) { - q->cb = roseNfaBlastAdaptorNoInternal; - } else { - q->cb = roseNfaBlastAdaptor; - } + if (info->only_external) { + q->cb = roseNfaBlastAdaptorNoInternal; } else { - if (info->only_external) { - q->cb = roseNfaBlastAdaptorNoInternalNoChain; - } else { - q->cb = roseNfaBlastAdaptorNoChain; - } + q->cb = roseNfaBlastAdaptor; } q->report_current = report_current; q->som_cb = roseNfaBlastSomAdaptor; DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end, q_cur_loc(q), to_loc); char alive = nfaQueueExec(q->nfa, q, to_loc); - if (info->only_external) { - q->cb = roseNfaAdaptorNoInternal; - } else { - q->cb = roseNfaAdaptor; - } + q->cb = roseNfaAdaptor; q->som_cb = roseNfaSomAdaptor; assert(!q->report_current); @@ -945,14 +782,16 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, static never_inline hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, s64a final_loc, struct hs_scratch *scratch) { - struct RoseContext *tctxt = &scratch->tctxt; assert(t->activeArrayCount); - assert(scratch->core_info.buf_offset + loc >= tctxt->minNonMpvMatchOffset); - DEBUG_PRINTF("roseCatchUpNfas %lld/%lld\n", loc, final_loc); + DEBUG_PRINTF("roseCatchUpNfas offset=%llu + %lld/%lld\n", + scratch->core_info.buf_offset, loc, final_loc); DEBUG_PRINTF("min non mpv match offset %llu\n", scratch->tctxt.minNonMpvMatchOffset); + struct RoseContext *tctxt = &scratch->tctxt; + assert(scratch->core_info.buf_offset + loc >= tctxt->minNonMpvMatchOffset); + char *state = scratch->core_info.state; struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); diff --git a/src/rose/catchup.h b/src/rose/catchup.h index 692b184bd..24b843f5a 100644 --- a/src/rose/catchup.h +++ b/src/rose/catchup.h @@ -56,9 +56,6 @@ #include "ue2common.h" #include "util/multibit.h" - -/* Callbacks, defined in catchup.c */ - hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch); /* will only catch mpv up to last reported external match */ diff --git a/src/rose/eod.c b/src/rose/eod.c index 449f26f49..7e8d4b3d7 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -34,8 +34,7 @@ static really_inline void initContext(const struct RoseEngine *t, char *state, u64a offset, - struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback) { + struct hs_scratch *scratch) { struct RoseContext *tctxt = &scratch->tctxt; tctxt->groups = loadGroups(t, state); /* TODO: diff groups for eod */ tctxt->lit_offset_adjust = scratch->core_info.buf_offset @@ -44,8 +43,6 @@ void initContext(const struct RoseEngine *t, char *state, u64a offset, tctxt->delayLastEndOffset = offset; tctxt->lastEndOffset = offset; tctxt->filledDelayedSlots = 0; - tctxt->cb = callback; - tctxt->cb_som = som_callback; tctxt->lastMatchOffset = 0; tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; @@ -109,38 +106,21 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); + const u64a som = 0; const size_t match_len = 0; - if (roseRunProgram(t, scratch, t->eodIterProgramOffset, offset, match_len, - 0) == HWLM_TERMINATE_MATCHING) { + const char in_anchored = 0; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 1; + if (roseRunProgram(t, scratch, t->eodIterProgramOffset, som, offset, + match_len, in_anchored, in_catchup, + from_mpv, skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } return MO_CONTINUE_MATCHING; } -/** - * \brief Adapts an NfaCallback to the rose callback specified in the - * RoseContext. - */ -static -int eodNfaCallback(u64a offset, ReportID report, void *context) { - struct hs_scratch *scratch = context; - assert(scratch->magic == SCRATCH_MAGIC); - return scratch->tctxt.cb(offset, report, scratch); -} - -/** - * \brief Adapts a SomNfaCallback to the rose SOM callback specified in the - * RoseContext. - */ -static -int eodNfaSomCallback(u64a from_offset, u64a to_offset, ReportID report, - void *context) { - struct hs_scratch *scratch = context; - assert(scratch->magic == SCRATCH_MAGIC); - return scratch->tctxt.cb_som(from_offset, to_offset, report, scratch); -} - /** * \brief Check for (and deliver) reports from active output-exposed (suffix * or outfix) NFAs. @@ -190,8 +170,8 @@ int roseCheckNfaEod(const struct RoseEngine *t, char *state, nfaExpandState(nfa, fstate, sstate, offset, key); } - if (nfaCheckFinalState(nfa, fstate, sstate, offset, eodNfaCallback, - eodNfaSomCallback, + if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor, + roseReportSomAdaptor, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); return MO_HALT_MATCHING; @@ -239,8 +219,8 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, char *state, u64a offset, * history buffer. */ char rv = nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); if (rv) { /* nfa is still alive */ - if (nfaCheckFinalState(nfa, fstate, sstate, offset, eodNfaCallback, - eodNfaSomCallback, + if (nfaCheckFinalState(nfa, fstate, sstate, offset, + roseReportAdaptor, roseReportSomAdaptor, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); return; @@ -261,9 +241,15 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset, // There should be no pending delayed literals. assert(!scratch->tctxt.filledDelayedSlots); + const u64a som = 0; const size_t match_len = 0; - if (roseRunProgram(t, scratch, t->eodProgramOffset, offset, match_len, 0) == - HWLM_TERMINATE_MATCHING) { + const char in_anchored = 0; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 1; + if (roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, match_len, + in_anchored, in_catchup, from_mpv, + skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -322,10 +308,8 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, } void roseEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback) { + struct hs_scratch *scratch) { assert(scratch); - assert(callback); assert(t->requiresEodCheck); DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf, scratch->core_info.len, scratch->core_info.hbuf, @@ -345,7 +329,7 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, char *state = scratch->core_info.state; assert(state); - initContext(t, state, offset, scratch, callback, som_callback); + initContext(t, state, offset, scratch); roseEodExec_i(t, state, offset, scratch, 1); } diff --git a/src/rose/match.c b/src/rose/match.c index 96f090281..ef75b1134 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -96,9 +96,15 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, const u32 program = delayRebuildPrograms[id]; if (program) { + const u64a som = 0; const size_t match_len = end - start + 1; + const char in_anchored = 0; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 0; UNUSED hwlmcb_rv_t rv = - roseRunProgram(t, scratch, program, real_end, match_len, 0); + roseRunProgram(t, scratch, program, som, real_end, match_len, + in_anchored, in_catchup, from_mpv, skip_mpv_catchup); assert(rv != HWLM_TERMINATE_MATCHING); } @@ -138,8 +144,10 @@ void recordAnchoredLiteralMatch(const struct RoseEngine *t, } hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, ReportID r, - u64a end, char in_catchup) { + struct hs_scratch *scratch, u32 event, + u64a top_squash_distance, u64a end, + char in_catchup) { + assert(event == MQE_TOP || event >= MQE_TOP_FIRST); struct core_info *ci = &scratch->core_info; u8 *aa = getActiveLeafArray(t, scratch->core_info.state); @@ -147,18 +155,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, struct fatbit *activeQueues = scratch->aqa; u32 qCount = t->queueCount; - const struct internal_report *ri = getInternalReport(t, r); - assert(ri->type == INTERNAL_ROSE_CHAIN); - - u32 qi = 0; /* MPV is always queue 0 if it exists */ - u32 event = ri->onmatch; - assert(event == MQE_TOP || event >= MQE_TOP_FIRST); - - /* TODO: populate INTERNAL_ROSE_CHAIN internal reports with offset where - * possible */ - if (end < ri->minOffset || (ri->maxOffset && end > ri->maxOffset)) { - return HWLM_CONTINUE_MATCHING; - } + const u32 qi = 0; /* MPV is always queue 0 if it exists */ struct mq *q = &scratch->queues[qi]; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); @@ -189,11 +186,11 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, } } - if (ri->aux.topSquashDistance) { + if (top_squash_distance) { assert(q->cur != q->end); struct mq_item *last = &q->items[q->end - 1]; if (last->type == event - && last->location >= loc - (s64a)ri->aux.topSquashDistance) { + && last->location >= loc - (s64a)top_squash_distance) { last->location = loc; goto event_enqueued; } @@ -255,8 +252,14 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { const u32 *programs = getByOffset(t, t->litProgramOffset); assert(id < t->literalCount); - if (roseRunProgram(t, scratch, programs[id], real_end, match_len, 1) == - HWLM_TERMINATE_MATCHING) { + const u64a som = 0; + const char in_anchored = 1; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 0; + if (roseRunProgram(t, scratch, programs[id], som, real_end, match_len, + in_anchored, in_catchup, from_mpv, + skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; @@ -280,7 +283,13 @@ hwlmcb_rv_t roseProcessMatch(const struct RoseEngine *t, DEBUG_PRINTF("id=%u\n", id); const u32 *programs = getByOffset(t, t->litProgramOffset); assert(id < t->literalCount); - return roseRunProgram(t, scratch, programs[id], end, match_len, 0); + const u64a som = 0; + const char in_anchored = 0; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 0; + return roseRunProgram(t, scratch, programs[id], som, end, match_len, + in_anchored, in_catchup, from_mpv, skip_mpv_catchup); } static rose_inline @@ -568,11 +577,17 @@ hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, struct hs_scratch *scratch = context; struct core_info *ci = &scratch->core_info; const u64a real_end = (u64a)end + ci->buf_offset + 1; + const u64a som = 0; const size_t match_len = end - start + 1; const struct RoseEngine *rose = ci->rose; const u32 *programs = getByOffset(rose, rose->litProgramOffset); assert(id < rose->literalCount); - return roseRunProgram(rose, scratch, programs[id], real_end, match_len, 0); + const char in_anchored = 0; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 0; + return roseRunProgram(rose, scratch, programs[id], som, real_end, match_len, + in_anchored, in_catchup, from_mpv, skip_mpv_catchup); } /** @@ -606,13 +621,53 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, // time we are running boundary report programs. scratch->tctxt.minMatchOffset = stream_offset; + const u64a som = 0; const size_t match_len = 0; const char in_anchored = 0; - hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, stream_offset, - match_len, in_anchored); + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 0; + hwlmcb_rv_t rv = + roseRunProgram(rose, scratch, program, som, stream_offset, match_len, + in_anchored, in_catchup, from_mpv, skip_mpv_catchup); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } return MO_CONTINUE_MATCHING; } + +static really_inline +int roseReportAdaptor_i(u64a som, u64a offset, ReportID id, void *context) { + struct hs_scratch *scratch = context; + assert(scratch && scratch->magic == SCRATCH_MAGIC); + + const struct RoseEngine *rose = scratch->core_info.rose; + + assert(id < rose->reportProgramCount); + const u32 *programs = getByOffset(rose, rose->reportProgramOffset); + + const size_t match_len = 0; // Unused in this path. + const char in_anchored = 0; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 1; + hwlmcb_rv_t rv = + roseRunProgram(rose, scratch, programs[id], som, offset, match_len, + in_anchored, in_catchup, from_mpv, skip_mpv_catchup); + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; +} + +int roseReportAdaptor(u64a offset, ReportID id, void *context) { + DEBUG_PRINTF("offset=%llu, id=%u\n", offset, id); + return roseReportAdaptor_i(0, offset, id, context); +} + +int roseReportSomAdaptor(u64a som, u64a offset, ReportID id, void *context) { + DEBUG_PRINTF("som=%llu, offset=%llu, id=%u\n", som, offset, id); + return roseReportAdaptor_i(som, offset, id, context); +} diff --git a/src/rose/match.h b/src/rose/match.h index d62fe553e..cee32fc24 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -41,13 +41,11 @@ #include "som/som_runtime.h" #include "util/bitutils.h" #include "util/fatbit.h" -#include "util/internal_report.h" #include "util/multibit.h" /* Callbacks, defined in catchup.c */ int roseNfaAdaptor(u64a offset, ReportID id, void *context); -int roseNfaAdaptorNoInternal(u64a offset, ReportID id, void *context); int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context); /* Callbacks, defined in match.c */ @@ -60,8 +58,9 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx); /* Common code, used all over Rose runtime */ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, ReportID r, - u64a end, char in_catchup); + struct hs_scratch *scratch, u32 event, + u64a top_squash_distance, u64a end, + char in_catchup); static really_inline void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, @@ -78,11 +77,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->length = scratch->core_info.len; q->history = scratch->core_info.hbuf; q->hlength = scratch->core_info.hlen; - if (info->only_external) { - q->cb = roseNfaAdaptorNoInternal; - } else { - q->cb = roseNfaAdaptor; - } + q->cb = roseNfaAdaptor; q->som_cb = roseNfaSomAdaptor; q->context = scratch; q->report_current = 0; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 2dd3ba8bf..db1dc8c16 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -573,14 +573,13 @@ void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch, static rose_inline hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, - u64a end, ReportID id, ReportID onmatch, - s32 offset_adjust, u32 ekey) { + u64a end, ReportID onmatch, s32 offset_adjust, + u32 ekey) { assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); - DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); + DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); updateLastMatchOffset(&scratch->tctxt, end); - int cb_rv = roseDeliverReport(end, id, onmatch, offset_adjust, scratch, - ekey); + int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; @@ -599,47 +598,45 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, static rose_inline hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, - ReportID r, u64a end) { - if (roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) { + u32 event, u64a top_squash_distance, + u64a end, const char in_catchup) { + if (!in_catchup && + roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - - return roseHandleChainMatch(t, scratch, r, end, 0); + return roseHandleChainMatch(t, scratch, event, top_squash_distance, end, + in_catchup); } static rose_inline -void roseHandleSom(const struct RoseEngine *t, struct hs_scratch *scratch, - ReportID id, u64a end) { - DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, - scratch->tctxt.minMatchOffset); - - // Reach into reports and handle internal reports that just manipulate SOM - // slots ourselves, rather than going through the callback. +void roseHandleSom(UNUSED const struct RoseEngine *t, + struct hs_scratch *scratch, const struct som_operation *sr, + u64a end) { + DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end, + scratch->tctxt.minMatchOffset); assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); - DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); updateLastMatchOffset(&scratch->tctxt, end); - - const struct internal_report *ri = getInternalReport(t, id); - handleSomInternal(scratch, ri, end); + handleSomInternal(scratch, sr, end); } static rose_inline hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, - struct hs_scratch *scratch, ReportID id, u64a start, - u64a end, char is_exhaustible) { + struct hs_scratch *scratch, u64a start, u64a end, + ReportID onmatch, s32 offset_adjust, u32 ekey) { assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); - DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); + DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n", + onmatch, start, end); updateLastMatchOffset(&scratch->tctxt, end); - const struct internal_report *ir = getInternalReport(t, id); - int cb_rv = roseDeliverSomReport(start, end, ir, scratch, is_exhaustible); + int cb_rv = roseDeliverSomReport(start, end, onmatch, offset_adjust, + scratch, ekey); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; } - if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return HWLM_CONTINUE_MATCHING; } @@ -647,19 +644,15 @@ hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, } static rose_inline -void roseHandleSomSom(const struct RoseEngine *t, ReportID id, u64a start, - u64a end, struct hs_scratch *scratch) { - DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", - id, start, end, scratch->tctxt.minMatchOffset); - - // Reach into reports and handle internal reports that just manipulate SOM - // slots ourselves, rather than going through the callback. +void roseHandleSomSom(UNUSED const struct RoseEngine *t, + struct hs_scratch *scratch, + const struct som_operation *sr, u64a start, u64a end) { + DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end, + scratch->tctxt.minMatchOffset); assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); updateLastMatchOffset(&scratch->tctxt, end); - - const struct internal_report *ri = getInternalReport(t, id); - setSomFromSomAware(scratch, ri, start, end); + setSomFromSomAware(scratch, sr, start, end); } static really_inline @@ -807,6 +800,16 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { return end >= min_bound && end <= max_bound; } +static +void updateSeqPoint(struct RoseContext *tctxt, u64a offset, + const char from_mpv) { + if (from_mpv) { + updateMinMatchOffsetFromMpv(tctxt, offset); + } else { + updateMinMatchOffset(tctxt, offset); + } +} + #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ @@ -822,8 +825,10 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { static rose_inline hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a end, size_t match_len, char in_anchored) { - DEBUG_PRINTF("program begins at offset %u\n", programOffset); + u64a som, u64a end, size_t match_len, + char in_anchored, char in_catchup, char from_mpv, + char skip_mpv_catchup) { + DEBUG_PRINTF("program=%u, offsets [%llu,%llu]\n", programOffset, som, end); assert(programOffset); assert(programOffset < t->size); @@ -831,8 +836,6 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, const char *pc_base = getByOffset(t, programOffset); const char *pc = pc_base; - u64a som = 0; - // Local sparse iterator state for programs that use the SPARSE_ITER_BEGIN // and SPARSE_ITER_NEXT instructions. struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; @@ -972,6 +975,17 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP_MPV) { + if (from_mpv || skip_mpv_catchup) { + DEBUG_PRINTF("skipping mpv catchup\n"); + } else if (roseCatchUpMPV(t, + end - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { assert(ri->distance <= end); som = end - ri->distance; @@ -986,10 +1000,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SOM_FROM_REPORT) { - const struct internal_report *ir = - getInternalReport(t, ri->report); - som = handleSomExternal(scratch, ir, end); - DEBUG_PRINTF("som from report %u is %llu\n", ri->report, som); + som = handleSomExternal(scratch, &ri->som, end); + DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, + som); } PROGRAM_NEXT_INSTRUCTION @@ -1016,11 +1029,13 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE) { - const struct internal_report *ir = - getInternalReport(t, ri->report); - const char do_som = t->hasSom; // FIXME: constant propagate - enum DedupeResult rv = dedupeCatchup( - t, ir, scratch, end, som, end + ir->offsetAdjust, do_som); + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); switch (rv) { case DEDUPE_HALT: return HWLM_TERMINATE_MATCHING; @@ -1035,10 +1050,13 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE_SOM) { - const struct internal_report *ir = - getInternalReport(t, ri->report); - enum DedupeResult rv = dedupeCatchupSom( - t, ir, scratch, end, som, end + ir->offsetAdjust); + updateSeqPoint(tctxt, end, from_mpv); + const char is_external_report = 0; + const char do_som = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); switch (rv) { case DEDUPE_HALT: return HWLM_TERMINATE_MATCHING; @@ -1053,9 +1071,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_CHAIN) { - if (roseCatchUpAndHandleChainMatch(t, scratch, ri->report, - end) == - HWLM_TERMINATE_MATCHING) { + // Note: sequence points updated inside this function. + if (roseCatchUpAndHandleChainMatch( + t, scratch, ri->event, ri->top_squash_distance, end, + in_catchup) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1063,20 +1082,22 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_INT) { - roseHandleSom(t, scratch, ri->report, end); + updateSeqPoint(tctxt, end, from_mpv); + roseHandleSom(t, scratch, &ri->som, end); work_done = 1; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_AWARE) { - roseHandleSomSom(t, ri->report, som, end, scratch); + updateSeqPoint(tctxt, end, from_mpv); + roseHandleSomSom(t, scratch, &ri->som, som, end); work_done = 1; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT) { - if (roseReport(t, scratch, end, ri->report, ri->onmatch, - ri->offset_adjust, + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1085,8 +1106,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_EXHAUST) { - if (roseReport(t, scratch, end, ri->report, ri->onmatch, - ri->offset_adjust, + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, ri->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -1095,9 +1116,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { - const char is_exhaustible = 0; - if (roseReportSom(t, scratch, ri->report, som, end, - is_exhaustible) == HWLM_TERMINATE_MATCHING) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1105,9 +1127,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_EXHAUST) { - const char is_exhaustible = 1; - if (roseReportSom(t, scratch, ri->report, som, end, - is_exhaustible) == HWLM_TERMINATE_MATCHING) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReportSom(t, scratch, som, end, ri->onmatch, + ri->offset_adjust, + ri->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1115,11 +1138,13 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE_AND_REPORT) { - const struct internal_report *ir = - getInternalReport(t, ri->report); - const char do_som = t->hasSom; // FIXME: constant propagate - enum DedupeResult rv = dedupeCatchup( - t, ir, scratch, end, som, end + ir->offsetAdjust, do_som); + updateSeqPoint(tctxt, end, from_mpv); + const char do_som = t->hasSom; // TODO: constant propagate + const char is_external_report = 1; + enum DedupeResult rv = + dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, + ri->dkey, ri->offset_adjust, + is_external_report, ri->quash_som, do_som); switch (rv) { case DEDUPE_HALT: return HWLM_TERMINATE_MATCHING; @@ -1132,8 +1157,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } const u32 ekey = INVALID_EKEY; - if (roseReport(t, scratch, end, ri->report, ir->onmatch, - ir->offsetAdjust, + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } diff --git a/src/rose/rose.h b/src/rose/rose.h index a9058379e..e90d2f216 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -41,8 +41,7 @@ void roseInitState(const struct RoseEngine *t, char *state); void roseBlockEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch); -void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback); +void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch); static really_inline int roseBlockHasEodWork(const struct RoseEngine *t, @@ -79,8 +78,7 @@ int roseBlockHasEodWork(const struct RoseEngine *t, /* assumes core_info in scratch has been init to point to data */ static really_inline -void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback) { +void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(t); assert(scratch); assert(scratch->core_info.buf); @@ -101,7 +99,7 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF || length <= t->maxBiAnchoredWidth); - roseBlockExec_i(t, scratch, callback, som_callback); + roseBlockExec_i(t, scratch); if (!t->requiresEodCheck) { return; @@ -121,16 +119,17 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, } /* assumes core_info in scratch has been init to point to data */ -void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback); +void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch); void roseEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch, RoseCallback callback, - RoseCallbackSom som_callback); + struct hs_scratch *scratch); hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, void *context); +int roseReportAdaptor(u64a offset, ReportID id, void *context); +int roseReportSomAdaptor(u64a som, u64a offset, ReportID id, void *context); + int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, u64a stream_offset, struct hs_scratch *scratch); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 7fe29538a..add3ac2d8 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -68,7 +68,6 @@ #include "util/compile_error.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/internal_report.h" #include "util/multibit_build.h" #include "util/order_check.h" #include "util/queue_index_factory.h" @@ -196,6 +195,7 @@ class RoseInstruction { case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; case ROSE_INSTR_CATCH_UP: return &u.catchUp; + case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv; case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport; @@ -206,7 +206,7 @@ class RoseInstruction { case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom; case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; - case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSom; + case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSomAware; case ROSE_INSTR_REPORT: return &u.report; case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust; case ROSE_INSTR_REPORT_SOM: return &u.reportSom; @@ -240,6 +240,7 @@ class RoseInstruction { case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); + case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv); case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport); @@ -250,7 +251,7 @@ class RoseInstruction { case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom); case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); - case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSom); + case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSomAware); case ROSE_INSTR_REPORT: return sizeof(u.report); case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust); case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); @@ -283,6 +284,7 @@ class RoseInstruction { ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; ROSE_STRUCT_PUSH_DELAYED pushDelayed; ROSE_STRUCT_CATCH_UP catchUp; + ROSE_STRUCT_CATCH_UP_MPV catchUpMpv; ROSE_STRUCT_SOM_ADJUST somAdjust; ROSE_STRUCT_SOM_LEFTFIX somLeftfix; ROSE_STRUCT_SOM_FROM_REPORT somFromReport; @@ -396,6 +398,9 @@ struct build_context : boost::noncopyable { * matches, suffixes, outfixes etc. */ bool needs_catchup = false; + /** \brief True if this Rose engine has an MPV engine. */ + bool needs_mpv_catchup = false; + /** \brief Resources in use (tracked as programs are added). */ RoseResources resources; @@ -578,7 +583,7 @@ bool isPureFloating(const RoseResources &resources) { } static -bool isSingleOutfix(const RoseBuildImpl &tbi, u32 outfixEndQueue) { +bool isSingleOutfix(const RoseBuildImpl &tbi) { for (auto v : vertices_range(tbi.g)) { if (tbi.isAnyStart(v)) { continue; @@ -598,12 +603,12 @@ bool isSingleOutfix(const RoseBuildImpl &tbi, u32 outfixEndQueue) { return false; /* streaming runtime makes liberal use of broken flag */ } - return outfixEndQueue == 1; + return tbi.outfixes.size() == 1; } static u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, - u32 outfixEndQueue) { + UNUSED u32 outfixEndQueue) { DEBUG_PRINTF("has_outfixes=%d\n", bc.resources.has_outfixes); DEBUG_PRINTF("has_suffixes=%d\n", bc.resources.has_suffixes); DEBUG_PRINTF("has_leftfixes=%d\n", bc.resources.has_leftfixes); @@ -618,13 +623,38 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, return ROSE_RUNTIME_PURE_LITERAL; } - if (isSingleOutfix(build, outfixEndQueue)) { + if (isSingleOutfix(build)) { return ROSE_RUNTIME_SINGLE_OUTFIX; } return ROSE_RUNTIME_FULL_ROSE; } +/** + * \brief True if this Rose engine needs to run MPV catch up in front of + * non-MPV reports. + */ +static +bool needsMpvCatchup(const RoseBuildImpl &build) { + const auto &outfixes = build.outfixes; + bool has_mpv = + any_of(begin(outfixes), end(outfixes), [](const OutfixInfo &outfix) { + return outfix.is_nonempty_mpv(); + }); + + if (!has_mpv) { + DEBUG_PRINTF("no mpv\n"); + return false; + } + + if (isSingleOutfix(build)) { + DEBUG_PRINTF("single outfix\n"); + return false; + } + + return true; +} + static void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, @@ -1941,32 +1971,6 @@ struct DerivedBoundaryReports { set report_at_0_eod_full; }; -static -void fillInReportInfo(RoseEngine *engine, u32 reportOffset, - const ReportManager &rm, const vector &reports) { - internal_report *dest = (internal_report *)((char *)engine + reportOffset); - engine->intReportOffset = reportOffset; - engine->intReportCount = (u32)reports.size(); - - assert(ISALIGNED(dest)); - - for (const auto &report : reports) { - writeInternalReport(report, rm, dest++); - } - - DEBUG_PRINTF("%zu reports of size %zu\n", reports.size(), - sizeof(internal_report)); -} - -static -bool hasSimpleReports(const vector &reports) { - auto it = find_if(reports.begin(), reports.end(), isComplexReport); - - DEBUG_PRINTF("runtime has %scomplex reports\n", - it == reports.end() ? "no " : ""); - return it == reports.end(); -} - static void prepSomRevNfas(const SomSlotManager &ssm, u32 *rev_nfa_table_offset, vector *nfa_offsets, u32 *currOffset) { @@ -2473,16 +2477,22 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, } static -void makeDedupe(const ReportID id, vector &report_block) { +void makeDedupe(const RoseBuildImpl &build, const Report &report, + vector &report_block) { auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK); - ri.u.dedupe.report = id; + ri.u.dedupe.quash_som = report.quashSom; + ri.u.dedupe.dkey = build.rm.getDkey(report); + ri.u.dedupe.offset_adjust = report.offsetAdjust; report_block.push_back(move(ri)); } static -void makeDedupeSom(const ReportID id, vector &report_block) { +void makeDedupeSom(const RoseBuildImpl &build, const Report &report, + vector &report_block) { auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK); - ri.u.dedupeSom.report = id; + ri.u.dedupeSom.quash_som = report.quashSom; + ri.u.dedupeSom.dkey = build.rm.getDkey(report); + ri.u.dedupeSom.offset_adjust = report.offsetAdjust; report_block.push_back(move(ri)); } @@ -2510,6 +2520,92 @@ void makeCatchup(RoseBuildImpl &build, build_context &bc, program.emplace_back(ROSE_INSTR_CATCH_UP); } +static +void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, + vector &program) { + if (!bc.needs_mpv_catchup) { + return; + } + + const Report &report = build.rm.getReport(id); + if (report.type == INTERNAL_ROSE_CHAIN) { + return; + } + + program.emplace_back(ROSE_INSTR_CATCH_UP_MPV); +} + +static +void writeSomOperation(const Report &report, som_operation *op) { + assert(op); + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REL: + op->type = SOM_EXTERNAL_CALLBACK_REL; + break; + case INTERNAL_SOM_LOC_SET: + op->type = SOM_INTERNAL_LOC_SET; + break; + case INTERNAL_SOM_LOC_SET_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_COPY: + op->type = SOM_INTERNAL_LOC_COPY; + break; + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; + break; + case EXTERNAL_CALLBACK_SOM_STORED: + op->type = SOM_EXTERNAL_CALLBACK_STORED; + break; + case EXTERNAL_CALLBACK_SOM_ABS: + op->type = SOM_EXTERNAL_CALLBACK_ABS; + break; + case EXTERNAL_CALLBACK_SOM_REV_NFA: + op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_FROM: + op->type = SOM_INTERNAL_LOC_SET_FROM; + break; + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; + break; + default: + // This report doesn't correspond to a SOM operation. + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + op->onmatch = report.onmatch; + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->aux.revNfaIndex = report.revNfaIndex; + break; + default: + op->aux.somDistance = report.somDistance; + break; + } +} + static void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, vector &program) { @@ -2536,10 +2632,12 @@ void makeReport(RoseBuildImpl &build, const ReportID id, report_block.push_back(move(ri)); } - // External SOM reports need their SOM value calculated. - if (isExternalSomReport(report)) { + // External SOM reports that aren't passthrough need their SOM value + // calculated. + if (isExternalSomReport(report) && + report.type != EXTERNAL_CALLBACK_SOM_PASS) { auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT); - ri.u.somFromReport.report = id; + writeSomOperation(report, &ri.u.somFromReport.som); report_block.push_back(move(ri)); } @@ -2567,33 +2665,40 @@ void makeReport(RoseBuildImpl &build, const ReportID id, if (needs_dedupe) { report_block.emplace_back(ROSE_INSTR_DEDUPE_AND_REPORT, JumpTarget::NEXT_BLOCK); - report_block.back().u.dedupeAndReport.report = id; + auto &ri = report_block.back(); + ri.u.dedupeAndReport.quash_som = report.quashSom; + ri.u.dedupeAndReport.dkey = build.rm.getDkey(report); + ri.u.dedupeAndReport.onmatch = report.onmatch; + ri.u.dedupeAndReport.offset_adjust = report.offsetAdjust; } else { report_block.emplace_back(ROSE_INSTR_REPORT); auto &ri = report_block.back(); - ri.u.report.report = id; ri.u.report.onmatch = report.onmatch; ri.u.report.offset_adjust = report.offsetAdjust; } } else { if (needs_dedupe) { - makeDedupe(id, report_block); + makeDedupe(build, report, report_block); } report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST); auto &ri = report_block.back(); - ri.u.reportExhaust.report = id; ri.u.reportExhaust.onmatch = report.onmatch; ri.u.reportExhaust.offset_adjust = report.offsetAdjust; ri.u.reportExhaust.ekey = report.ekey; } } else { // has_som - makeDedupeSom(id, report_block); + makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - report_block.back().u.reportSom.report = id; + auto &ri = report_block.back(); + ri.u.reportSom.onmatch = report.onmatch; + ri.u.reportSom.offset_adjust = report.offsetAdjust; } else { report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - report_block.back().u.reportSomExhaust.report = id; + auto &ri = report_block.back(); + ri.u.reportSomExhaust.onmatch = report.onmatch; + ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; + ri.u.reportSomExhaust.ekey = report.ekey; } } break; @@ -2610,29 +2715,55 @@ void makeReport(RoseBuildImpl &build, const ReportID id, case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: if (has_som) { report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE); - report_block.back().u.reportSomAware.report = id; + auto &ri = report_block.back(); + writeSomOperation(report, &ri.u.reportSomAware.som); } else { report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT); - report_block.back().u.reportSomInt.report = id; + auto &ri = report_block.back(); + writeSomOperation(report, &ri.u.reportSomInt.som); } break; - case INTERNAL_ROSE_CHAIN: + case INTERNAL_ROSE_CHAIN: { report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN); - report_block.back().u.reportChain.report = id; + auto &ri = report_block.back(); + ri.u.reportChain.event = report.onmatch; + ri.u.reportChain.top_squash_distance = report.topSquashDistance; break; + } case EXTERNAL_CALLBACK_SOM_REL: case EXTERNAL_CALLBACK_SOM_STORED: case EXTERNAL_CALLBACK_SOM_ABS: case EXTERNAL_CALLBACK_SOM_REV_NFA: - makeDedupeSom(id, report_block); + makeDedupeSom(build, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM); + auto &ri = report_block.back(); + ri.u.reportSom.onmatch = report.onmatch; + ri.u.reportSom.offset_adjust = report.offsetAdjust; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); + auto &ri = report_block.back(); + ri.u.reportSomExhaust.onmatch = report.onmatch; + ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; + ri.u.reportSomExhaust.ekey = report.ekey; + } + break; + case EXTERNAL_CALLBACK_SOM_PASS: + makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - report_block.back().u.reportSom.report = id; + auto &ri = report_block.back(); + ri.u.reportSom.onmatch = report.onmatch; + ri.u.reportSom.offset_adjust = report.offsetAdjust; } else { report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - report_block.back().u.reportSomExhaust.report = id; + auto &ri = report_block.back(); + ri.u.reportSomExhaust.onmatch = report.onmatch; + ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; + ri.u.reportSomExhaust.ekey = report.ekey; } break; + default: assert(0); throw CompileError("Unable to generate bytecode."); @@ -3571,6 +3702,26 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { return {litProgramsOffset, delayRebuildProgramsOffset}; } +static +u32 buildReportPrograms(RoseBuildImpl &build, build_context &bc) { + const auto &rm = build.rm; + const u32 numReports = verify_u32(rm.numReports()); + vector programs(numReports); + + vector program; + for (ReportID id = 0; id < numReports; id++) { + program.clear(); + const bool has_som = false; + makeCatchupMpv(build, bc, id, program); + makeReport(build, id, has_som, program); + programs[id] = writeProgram(bc, flattenProgram({program})); + DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, + programs.back(), program.size()); + } + + return add_to_engine_blob(bc, begin(programs), end(programs)); +} + static vector makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, @@ -3787,6 +3938,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; } + bc.needs_mpv_catchup = needsMpvCatchup(*this); auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); @@ -3835,6 +3987,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 eodIterOffset; tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc); + u32 reportProgramOffset = buildReportPrograms(*this, bc); + vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -3900,12 +4054,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset += verify_u32(sbsize); } - const vector &int_reports = rm.reports(); - - currOffset = ROUNDUP_CL(currOffset); - u32 intReportOffset = currOffset; - currOffset += sizeof(internal_report) * int_reports.size(); - u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size(); currOffset = leftOffset + roseLen; @@ -4004,14 +4152,13 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->somHorizon = ssm.somPrecision(); engine->somLocationCount = ssm.numSomSlots(); - engine->simpleCallback = !rm.numEkeys() && hasSimpleReports(rm.reports()); engine->needsCatchup = bc.needs_catchup ? 1 : 0; - fillInReportInfo(engine.get(), intReportOffset, rm, int_reports); - engine->literalCount = verify_u32(final_id_to_literal.size()); engine->litProgramOffset = litProgramOffset; engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; + engine->reportProgramOffset = reportProgramOffset; + engine->reportProgramCount = verify_u32(rm.reports().size()); engine->runtimeImpl = pickRuntimeImpl(*this, bc, outfixEndQueue); engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 5b992fcb4..fd15e3963 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2833,7 +2833,7 @@ void mergePuffixes(RoseBuildImpl &tbi) { u32 squashDistance = allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v); - Report ir = makeRoseTrigger(event, squashDistance); + Report ir = makeMpvTrigger(event, squashDistance); ReportID id = tbi.rm.getInternalId(ir); DEBUG_PRINTF("puffette event q%u t%u\n", queue, event); diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 89dce9810..73f5940ba 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -41,7 +41,6 @@ #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" #include "util/dump_charclass.h" -#include "util/internal_report.h" #include "util/multibit_internal.h" #include "util/multibit.h" @@ -192,20 +191,20 @@ void dumpJumpTable(ofstream &os, const RoseEngine *t, } static -void dumpReport(ofstream &os, const RoseEngine *t, ReportID report) { - const auto *ir = - (const internal_report *)loadFromByteCodeOffset(t, t->intReportOffset) + - report; - os << " type=" << u32{ir->type}; - os << ", onmatch=" << ir->onmatch; - if (ir->ekey != INVALID_EKEY) { - os << ", ekey=" << ir->ekey; - } - if (ir->dkey != MO_INVALID_IDX) { - os << ", dkey=" << ir->dkey; +void dumpSomOperation(ofstream &os, const som_operation &op) { + os << " som (type=" << u32{op.type} << ", onmatch=" << op.onmatch; + switch (op.type) { + case SOM_EXTERNAL_CALLBACK_REV_NFA: + case SOM_INTERNAL_LOC_SET_REV_NFA: + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET: + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: + os << ", revNfaIndex=" << op.aux.revNfaIndex; + break; + default: + os << ", somDistance=" << op.aux.somDistance; + break; } - - os << endl; + os << ")" << endl; } static @@ -314,6 +313,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(CATCH_UP) {} PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP_MPV) {} + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { os << " distance " << ri->distance << endl; } @@ -326,8 +328,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SOM_FROM_REPORT) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + dumpSomOperation(os, ri->som); } PROGRAM_NEXT_INSTRUCTION @@ -348,64 +349,69 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " offset_adjust " << ri->offset_adjust << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE_SOM) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " offset_adjust " << ri->offset_adjust << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_CHAIN) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " event " << ri->event << endl; + os << " top_squash_distance " << ri->top_squash_distance + << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_INT) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + dumpSomOperation(os, ri->som); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_AWARE) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + dumpSomOperation(os, ri->som); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_EXHAUST) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " ekey " << ri->ekey << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_EXHAUST) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " ekey " << ri->ekey << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE_AND_REPORT) { - os << " report " << ri->report << endl; - dumpReport(os, t, ri->report); + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -537,6 +543,30 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { os.close(); } +static +void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset); + + for (u32 i = 0; i < t->reportProgramCount; i++) { + os << "Report " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + } + + os.close(); +} + static void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { const u32 qindex = n->queueIndex; @@ -834,9 +864,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { if (t->hasSom) { fprintf(f, " hasSom"); } - if (t->simpleCallback) { - fprintf(f, " simpleCallback"); - } fprintf(f, "\n"); fprintf(f, "dkey count : %u\n", t->dkeyCount); @@ -949,7 +976,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U8(t, canExhaust); DUMP_U8(t, hasSom); DUMP_U8(t, somHorizon); - DUMP_U8(t, simpleCallback); DUMP_U8(t, needsCatchup); DUMP_U32(t, mode); DUMP_U32(t, historyRequired); @@ -972,10 +998,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, eodmatcherMinWidth); DUMP_U32(t, amatcherMaxBiAnchoredWidth); DUMP_U32(t, fmatcherMaxBiAnchoredWidth); - DUMP_U32(t, intReportOffset); - DUMP_U32(t, intReportCount); DUMP_U32(t, litProgramOffset); DUMP_U32(t, litDelayRebuildProgramOffset); + DUMP_U32(t, reportProgramOffset); + DUMP_U32(t, reportProgramCount); DUMP_U32(t, literalCount); DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); @@ -1051,6 +1077,7 @@ void roseDumpComponents(const RoseEngine *t, bool dump_raw, dumpRevNfas(t, dump_raw, base); dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); + dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); } void roseDumpInternals(const RoseEngine *t, const string &base) { diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 37ff9168a..bbe0b1b61 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -305,8 +305,6 @@ struct RoseEngine { u8 hasSom; /**< has at least one pattern which tracks SOM. */ u8 somHorizon; /**< width in bytes of SOM offset storage (governed by SOM precision) */ - u8 simpleCallback; /**< has only external reports with no bounds checks, - plus no exhaustion keys */ u8 needsCatchup; /** catch up needs to be run on every report. */ u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */ u32 historyRequired; /**< max amount of history required for streaming */ @@ -343,9 +341,6 @@ struct RoseEngine { u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still * produce a match for a pattern involved * with the anchored table. */ - u32 intReportOffset; /**< offset of array of internal_report structures */ - u32 intReportCount; /**< number of internal_report structures */ - /** \brief Offset of u32 array of program offsets for literals. */ u32 litProgramOffset; @@ -353,6 +348,12 @@ struct RoseEngine { * literals. */ u32 litDelayRebuildProgramOffset; + /** \brief Offset of u32 array of program offsets for internal reports. */ + u32 reportProgramOffset; + + /** \brief Number of programs for internal reports. */ + u32 reportProgramCount; + /** * \brief Number of entries in the arrays pointed to by litProgramOffset, * litDelayRebuildProgramOffset. diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 7ac0360b5..834e997f7 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -33,6 +33,7 @@ #ifndef ROSE_ROSE_PROGRAM_H #define ROSE_ROSE_PROGRAM_H +#include "som/som_operation.h" #include "rose_internal.h" #include "ue2common.h" @@ -53,9 +54,10 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. + ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. - ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from an internal_report. + ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from a som_operation. ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero. ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. @@ -171,6 +173,10 @@ struct ROSE_STRUCT_CATCH_UP { u8 code; //!< From enum RoseInstructionCode. }; +struct ROSE_STRUCT_CATCH_UP_MPV { + u8 code; //!< From enum RoseInstructionCode. +}; + struct ROSE_STRUCT_SOM_ADJUST { u8 code; //!< From enum RoseInstructionCode. u32 distance; //!< Distance to EOM. @@ -184,7 +190,7 @@ struct ROSE_STRUCT_SOM_LEFTFIX { struct ROSE_STRUCT_SOM_FROM_REPORT { u8 code; //!< From enum RoseInstructionCode. - ReportID report; //!< EXTERNAL_CALLBACK_SOM_* report to use. + struct som_operation som; }; struct ROSE_STRUCT_SOM_ZERO { @@ -206,41 +212,49 @@ struct ROSE_STRUCT_TRIGGER_SUFFIX { struct ROSE_STRUCT_DEDUPE { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + u8 quash_som; //!< Force SOM to zero for this report. + u32 dkey; //!< Dedupe key. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_DEDUPE_SOM { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + u8 quash_som; //!< Force SOM to zero for this report. + u32 dkey; //!< Dedupe key. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_REPORT_CHAIN { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + u32 event; //!< Queue event, from MQE_*. Must be a top. + + /** + * \brief Number of bytes behind us that we are allowed to squash + * identical top events on the queue. + */ + u64a top_squash_distance; }; struct ROSE_STRUCT_REPORT_SOM_INT { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + struct som_operation som; }; struct ROSE_STRUCT_REPORT_SOM_AWARE { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + struct som_operation som; }; struct ROSE_STRUCT_REPORT { u8 code; //!< From enum RoseInstructionCode. - ReportID report; //!< Internal report ID (used for assertions). ReportID onmatch; //!< Report ID to deliver to user. s32 offset_adjust; //!< Offset adjustment to apply to end offset. }; struct ROSE_STRUCT_REPORT_EXHAUST { u8 code; //!< From enum RoseInstructionCode. - ReportID report; //!< Internal report ID (used for assertions). ReportID onmatch; //!< Report ID to deliver to user. s32 offset_adjust; //!< Offset adjustment to apply to end offset. u32 ekey; //!< Exhaustion key. @@ -248,22 +262,23 @@ struct ROSE_STRUCT_REPORT_EXHAUST { struct ROSE_STRUCT_REPORT_SOM { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. }; struct ROSE_STRUCT_REPORT_SOM_EXHAUST { u8 code; //!< From enum RoseInstructionCode. - ReportID report; -}; - -struct ROSE_STRUCT_REPORT_SOM_EXT { - u8 code; //!< From enum RoseInstructionCode. - ReportID report; + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. + u32 ekey; //!< Exhaustion key. }; struct ROSE_STRUCT_DEDUPE_AND_REPORT { u8 code; //!< From enum RoseInstructionCode. - ReportID report; + u8 quash_som; //!< Force SOM to zero for this report. + u32 dkey; //!< Dedupe key. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. u32 fail_jump; //!< Jump forward this many bytes on failure. }; diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 2716c3fa2..d4309bfb8 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -33,10 +33,9 @@ #ifndef ROSE_RUNTIME_H #define ROSE_RUNTIME_H -#include "scratch.h" #include "rose_internal.h" +#include "scratch.h" #include "util/exhaust.h" // for isExhausted -#include "util/internal_report.h" #include "util/partial_store.h" /* @@ -147,16 +146,6 @@ u32 has_chained_nfas(const struct RoseEngine *t) { return t->outfixBeginQueue; } -/** \brief Fetch \ref internal_report structure for this internal ID. */ -static really_inline -const struct internal_report *getInternalReport(const struct RoseEngine *t, - ReportID intId) { - const struct internal_report *reports = - (const struct internal_report *)((const u8 *)t + t->intReportOffset); - assert(intId < t->intReportCount); - return reports + intId; -} - static really_inline void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) { DEBUG_PRINTF("match @%llu, last match @%llu\n", offset, diff --git a/src/rose/stream.c b/src/rose/stream.c index 31a0227f5..b08fe04d6 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -422,8 +422,7 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, assert(!can_stop_matching(scratch)); } -void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, - RoseCallback callback, RoseCallbackSom som_callback) { +void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("OH HAI\n"); assert(t); assert(scratch->core_info.hbuf); @@ -457,8 +456,6 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->delayLastEndOffset = offset; tctxt->lastEndOffset = offset; tctxt->filledDelayedSlots = 0; - tctxt->cb = callback; - tctxt->cb_som = som_callback; tctxt->lastMatchOffset = 0; tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; diff --git a/src/runtime.c b/src/runtime.c index 6bc601412..cab612271 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -139,6 +139,12 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->som_set_now_offset = ~0ULL; s->deduper.current_report_offset = ~0ULL; s->deduper.som_log_dirty = 1; /* som logs have not been cleared */ + + // Rose program execution (used for some report paths) depends on these + // values being initialised. + s->tctxt.lastMatchOffset = 0; + s->tctxt.minMatchOffset = offset; + s->tctxt.minNonMpvMatchOffset = offset; } #define STATUS_VALID_BITS \ @@ -159,107 +165,6 @@ void setStreamStatus(char *state, u8 status) { *(u8 *)state = status; } -static -int roseAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { - return roseAdaptor_i(offset, id, scratch, 0, 0); -} - -static -int roseSimpleAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { - return roseAdaptor_i(offset, id, scratch, 1, 0); -} - -static -int roseSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { - return roseAdaptor_i(offset, id, scratch, 0, 1); -} - -static -int roseSimpleSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) { - return roseAdaptor_i(offset, id, scratch, 1, 1); -} - -static really_inline -RoseCallback selectAdaptor(const struct RoseEngine *rose) { - const char is_simple = rose->simpleCallback; - const char do_som = rose->hasSom; - - if (do_som) { - return is_simple ? roseSimpleSomAdaptor : roseSomAdaptor; - } else { - return is_simple ? roseSimpleAdaptor : roseAdaptor; - } -} - -static -int roseSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, - struct hs_scratch *scratch) { - return roseSomAdaptor_i(from_offset, to_offset, id, scratch, 0); -} - -static -int roseSimpleSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, - struct hs_scratch *scratch) { - return roseSomAdaptor_i(from_offset, to_offset, id, scratch, 1); -} - -static really_inline -RoseCallbackSom selectSomAdaptor(const struct RoseEngine *rose) { - const char is_simple = rose->simpleCallback; - - return is_simple ? roseSimpleSomSomAdaptor : roseSomSomAdaptor; -} - -static -int outfixSimpleSomAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 1, 1); -} - -static -int outfixSimpleAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 1, 0); -} - -static -int outfixSomAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 0, 1); -} - -static -int outfixAdaptor(u64a offset, ReportID id, void *context) { - return roseAdaptor_i(offset, id, context, 0, 0); -} - -static really_inline -NfaCallback selectOutfixAdaptor(const struct RoseEngine *rose) { - const char is_simple = rose->simpleCallback; - const char do_som = rose->hasSom; - - if (do_som) { - return is_simple ? outfixSimpleSomAdaptor : outfixSomAdaptor; - } else { - return is_simple ? outfixSimpleAdaptor : outfixAdaptor; - } -} - -static -int outfixSimpleSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, - void *context) { - return roseSomAdaptor_i(from_offset, to_offset, id, context, 1); -} - -static -int outfixSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id, - void *context) { - return roseSomAdaptor_i(from_offset, to_offset, id, context, 0); -} - -static really_inline -SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) { - const char is_simple = rose->simpleCallback; - return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor; -} - /** \brief Initialise SOM state. Used in both block and streaming mode. */ static really_inline void initSomState(const struct RoseEngine *rose, char *state) { @@ -278,8 +183,7 @@ void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) { DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len); - roseBlockExec(rose, scratch, selectAdaptor(rose), - selectSomAdaptor(rose)); + roseBlockExec(rose, scratch); } static really_inline @@ -312,8 +216,8 @@ void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->length = scratch->core_info.len; q->history = scratch->core_info.hbuf; q->hlength = scratch->core_info.hlen; - q->cb = selectOutfixAdaptor(t); - q->som_cb = selectOutfixSomAdaptor(t); + q->cb = roseReportAdaptor; + q->som_cb = roseReportSomAdaptor; q->context = scratch; q->report_current = 0; @@ -376,18 +280,16 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, const struct NFA *nfa = getSmwrNfa(smwr); - const struct RoseEngine *rose = scratch->core_info.rose; - size_t local_alen = length - smwr->start_offset; const u8 *local_buffer = buffer + smwr->start_offset; assert(isMcClellanType(nfa->type)); if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer, - local_alen, selectOutfixAdaptor(rose), scratch); + local_alen, roseReportAdaptor, scratch); } else { nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer, - local_alen, selectOutfixAdaptor(rose), scratch); + local_alen, roseReportAdaptor, scratch); } } @@ -430,11 +332,6 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, clearEvec(rose, scratch->core_info.exhaustionVector); - // Rose program execution (used for some report paths) depends on these - // values being initialised. - scratch->tctxt.lastMatchOffset = 0; - scratch->tctxt.minMatchOffset = 0; - if (!length) { if (rose->boundary.reportZeroEodOffset) { roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0, @@ -617,8 +514,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { return; } - roseEodExec(rose, id->offset, scratch, selectAdaptor(rose), - selectSomAdaptor(rose)); + roseEodExec(rose, id->offset, scratch); } static never_inline @@ -676,11 +572,6 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, getHistory(state, rose, id->offset), getHistoryAmount(rose, id->offset), id->offset, status, 0); - // Rose program execution (used for some report paths) depends on these - // values being initialised. - scratch->tctxt.lastMatchOffset = 0; - scratch->tctxt.minMatchOffset = id->offset; - if (rose->somLocationCount) { loadSomFromStream(scratch, id->offset); } @@ -797,7 +688,7 @@ void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) { const struct RoseEngine *rose = stream_state->rose; assert(rose); - roseStreamExec(rose, scratch, selectAdaptor(rose), selectSomAdaptor(rose)); + roseStreamExec(rose, scratch); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { @@ -917,11 +808,6 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, assert(scratch->core_info.hlen <= id->offset && scratch->core_info.hlen <= rose->historyRequired); - // Rose program execution (used for some report paths) depends on these - // values being initialised. - scratch->tctxt.lastMatchOffset = 0; - scratch->tctxt.minMatchOffset = id->offset; - prefetch_data(data, length); if (rose->somLocationCount) { diff --git a/src/scratch.h b/src/scratch.h index dc99106f9..a71dd21b0 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -119,8 +119,6 @@ struct RoseContext { * still allowed to report */ u64a next_mpv_offset; /**< earliest offset that the MPV can next report a * match, cleared if top events arrive */ - RoseCallback cb; - RoseCallbackSom cb_som; u32 filledDelayedSlots; u32 curr_qi; /**< currently executing main queue index during * \ref nfaQueueExec */ diff --git a/src/som/som_operation.h b/src/som/som_operation.h new file mode 100644 index 000000000..d85ad2268 --- /dev/null +++ b/src/som/som_operation.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief SOM runtime: data structures. + * + * Data structures used for SOM operations. + */ + +#ifndef SOM_OPERATION_H +#define SOM_OPERATION_H + +#include "ue2common.h" + +#define SOM_EXTERNAL_CALLBACK_REL 1 +#define SOM_INTERNAL_LOC_SET 2 +#define SOM_INTERNAL_LOC_SET_IF_UNSET 3 +#define SOM_INTERNAL_LOC_SET_IF_WRITABLE 4 +#define SOM_INTERNAL_LOC_SET_REV_NFA 5 +#define SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET 6 +#define SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE 7 +#define SOM_INTERNAL_LOC_COPY 8 +#define SOM_INTERNAL_LOC_COPY_IF_WRITABLE 9 +#define SOM_INTERNAL_LOC_MAKE_WRITABLE 10 +#define SOM_EXTERNAL_CALLBACK_STORED 11 +#define SOM_EXTERNAL_CALLBACK_ABS 12 +#define SOM_EXTERNAL_CALLBACK_REV_NFA 13 +#define SOM_INTERNAL_LOC_SET_FROM 14 +#define SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE 15 + +struct som_operation { + /** \brief Report type, from the definitions above. */ + u8 type; + + /* \brief SOM loc to modify. */ + u32 onmatch; + + union { + /** \brief SOM distance value, use varies according to type. + * + * - for SOM_EXTERNAL_CALLBACK_REL, from-offset is this many bytes + * before the to-offset. + * - for SOM_EXTERNAL_CALLBACK_ABS, set from-offset to this value. + * - for SOM_INTERNAL_LOC_COPY*, som location read_from. + */ + u64a somDistance; + + /** \brief Index of the reverse nfa. + * + * Used by SOM_EXTERNAL_CALLBACK_REV_NFA and + * SOM_INTERNAL_LOC_SET_REV_NFA*. + */ + u64a revNfaIndex; + } aux; +}; + +#endif // SOM_OPERATION_H + diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index fa9965157..9d0a1390e 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -39,6 +39,7 @@ */ #include "hs_internal.h" +#include "som_operation.h" #include "som_runtime.h" #include "scratch.h" #include "ue2common.h" @@ -47,11 +48,10 @@ #include "nfa/nfa_internal.h" #include "util/fatbit.h" #include "util/multibit.h" -#include "util/internal_report.h" static really_inline void setSomLoc(struct fatbit *som_set_now, u64a *som_store, u32 som_store_count, - const struct internal_report *ri, u64a to_offset) { + const struct som_operation *ri, u64a to_offset) { /* validity handled by callers */ assert(to_offset >= ri->aux.somDistance); u64a start_offset = to_offset - ri->aux.somDistance; @@ -112,7 +112,7 @@ const struct NFA *getSomRevNFA(const struct RoseEngine *t, u32 i) { } static -void runRevNfa(struct hs_scratch *scratch, const struct internal_report *ri, +void runRevNfa(struct hs_scratch *scratch, const struct som_operation *ri, const u64a to_offset, u64a *from_offset) { struct core_info *ci = &scratch->core_info; @@ -159,7 +159,7 @@ void runRevNfa(struct hs_scratch *scratch, const struct internal_report *ri, static really_inline void setSomLocRevNfa(struct hs_scratch *scratch, struct fatbit *som_set_now, u64a *som_store, u32 som_store_count, - const struct internal_report *ri, u64a to_offset) { + const struct som_operation *ri, u64a to_offset) { /* validity handled by callers */ u64a from_offset = 0; runRevNfa(scratch, ri, to_offset, &from_offset); @@ -178,7 +178,7 @@ void setSomLocRevNfa(struct hs_scratch *scratch, struct fatbit *som_set_now, } void handleSomInternal(struct hs_scratch *scratch, - const struct internal_report *ri, const u64a to_offset) { + const struct som_operation *ri, const u64a to_offset) { assert(scratch); assert(ri); DEBUG_PRINTF("-->som action required at %llu\n", to_offset); @@ -209,21 +209,21 @@ void handleSomInternal(struct hs_scratch *scratch, } switch (ri->type) { - case INTERNAL_SOM_LOC_SET: - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET\n"); + case SOM_INTERNAL_LOC_SET: + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET\n"); mmbit_set(som_store_valid, som_store_count, ri->onmatch); setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset); return; - case INTERNAL_SOM_LOC_SET_IF_UNSET: - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_IF_UNSET\n"); + case SOM_INTERNAL_LOC_SET_IF_UNSET: + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_UNSET\n"); if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count, ri->onmatch)) { setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset); } return; - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: { + case SOM_INTERNAL_LOC_SET_IF_WRITABLE: { u32 slot = ri->onmatch; - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_IF_WRITABLE\n"); + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_WRITABLE\n"); if (ok_and_mark_if_write(som_store_valid, som_set_now, som_store_writable, som_store_count, slot)) { setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset); @@ -245,23 +245,23 @@ void handleSomInternal(struct hs_scratch *scratch, } return; } - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_SOM_REV_NFA\n"); + case SOM_INTERNAL_LOC_SET_REV_NFA: + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_REV_NFA\n"); mmbit_set(som_store_valid, som_store_count, ri->onmatch); setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count, ri, to_offset); return; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET\n"); + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET: + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET\n"); if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count, ri->onmatch)) { setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count, ri, to_offset); } return; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: { + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: { u32 slot = ri->onmatch; - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_IF_WRITABLE\n"); + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_WRITABLE\n"); if (ok_and_mark_if_write(som_store_valid, som_set_now, som_store_writable, som_store_count, slot)) { setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count, @@ -285,10 +285,10 @@ void handleSomInternal(struct hs_scratch *scratch, } return; } - case INTERNAL_SOM_LOC_COPY: { + case SOM_INTERNAL_LOC_COPY: { u32 slot_in = ri->aux.somDistance; u32 slot_out = ri->onmatch; - DEBUG_PRINTF("INTERNAL_SOM_LOC_COPY S[%u] = S[%u]\n", slot_out, + DEBUG_PRINTF("SOM_INTERNAL_LOC_COPY S[%u] = S[%u]\n", slot_out, slot_in); assert(mmbit_isset(som_store_valid, som_store_count, slot_in)); mmbit_set(som_store_valid, som_store_count, slot_out); @@ -297,10 +297,10 @@ void handleSomInternal(struct hs_scratch *scratch, return; } - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: { + case SOM_INTERNAL_LOC_COPY_IF_WRITABLE: { u32 slot_in = ri->aux.somDistance; u32 slot_out = ri->onmatch; - DEBUG_PRINTF("INTERNAL_SOM_LOC_COPY_IF_WRITABLE S[%u] = S[%u]\n", + DEBUG_PRINTF("SOM_INTERNAL_LOC_COPY_IF_WRITABLE S[%u] = S[%u]\n", slot_out, slot_in); assert(mmbit_isset(som_store_valid, som_store_count, slot_in)); if (ok_and_mark_if_write(som_store_valid, som_set_now, @@ -322,9 +322,9 @@ void handleSomInternal(struct hs_scratch *scratch, } return; } - case INTERNAL_SOM_LOC_MAKE_WRITABLE: { + case SOM_INTERNAL_LOC_MAKE_WRITABLE: { u32 slot = ri->onmatch; - DEBUG_PRINTF("INTERNAL_SOM_LOC_MAKE_WRITABLE\n"); + DEBUG_PRINTF("SOM_INTERNAL_LOC_MAKE_WRITABLE\n"); /* if just written to the loc, ignore the racing escape */ if (fatbit_isset(som_set_now, som_store_count, slot)) { DEBUG_PRINTF("just written\n"); @@ -347,14 +347,14 @@ void handleSomInternal(struct hs_scratch *scratch, break; } - // All valid internal_report types should be handled and returned above. + // All valid som_operation types should be handled and returned above. assert(0); return; } // Returns the SOM offset. u64a handleSomExternal(struct hs_scratch *scratch, - const struct internal_report *ri, + const struct som_operation *ri, const u64a to_offset) { assert(scratch); assert(ri); @@ -368,20 +368,20 @@ u64a handleSomExternal(struct hs_scratch *scratch, assert(rose->hasSom); switch (ri->type) { - case EXTERNAL_CALLBACK_SOM_REL: - DEBUG_PRINTF("EXTERNAL_CALLBACK_SOM_REL: som is %llu chars back\n", + case SOM_EXTERNAL_CALLBACK_REL: + DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_REL: som is %llu chars back\n", ri->aux.somDistance); assert(to_offset >= ri->aux.somDistance); return to_offset - ri->aux.somDistance; - case EXTERNAL_CALLBACK_SOM_ABS: - DEBUG_PRINTF("EXTERNAL_CALLBACK_SOM_ABS: som is at %llu\n", + case SOM_EXTERNAL_CALLBACK_ABS: + DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_ABS: som is at %llu\n", ri->aux.somDistance); assert(to_offset >= ri->aux.somDistance); return ri->aux.somDistance; - case EXTERNAL_CALLBACK_SOM_STORED: { + case SOM_EXTERNAL_CALLBACK_STORED: { const u64a *som_store = scratch->som_store; u32 slot = ri->aux.somDistance; - DEBUG_PRINTF("EXTERNAL_CALLBACK_SOM_STORED: <- som_store[%u]=%llu\n", + DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_STORED: <- som_store[%u]=%llu\n", slot, som_store[slot]); UNUSED const u32 som_store_count = rose->somLocationCount; @@ -391,8 +391,8 @@ u64a handleSomExternal(struct hs_scratch *scratch, assert(mmbit_isset(som_store_valid, som_store_count, slot)); return som_store[slot]; } - case EXTERNAL_CALLBACK_SOM_REV_NFA: { - DEBUG_PRINTF("EXTERNAL_CALLBACK_REV_NFA\n"); + case SOM_EXTERNAL_CALLBACK_REV_NFA: { + DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_REV_NFA\n"); u64a from_offset = 0; runRevNfa(scratch, ri, to_offset, &from_offset); return from_offset; @@ -402,19 +402,19 @@ u64a handleSomExternal(struct hs_scratch *scratch, break; } - // All valid internal_report types should be handled and returned above. + // All valid som_operation types should be handled and returned above. assert(0); return 0; } void setSomFromSomAware(struct hs_scratch *scratch, - const struct internal_report *ri, u64a from_offset, + const struct som_operation *ri, u64a from_offset, u64a to_offset) { assert(scratch); assert(ri); assert(to_offset); - assert(ri->type == INTERNAL_SOM_LOC_SET_FROM - || ri->type == INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE); + assert(ri->type == SOM_INTERNAL_LOC_SET_FROM + || ri->type == SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE); struct core_info *ci = &scratch->core_info; const struct RoseEngine *rose = ci->rose; @@ -435,12 +435,12 @@ void setSomFromSomAware(struct hs_scratch *scratch, scratch->som_set_now_offset = to_offset; } - if (ri->type == INTERNAL_SOM_LOC_SET_FROM) { - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_FROM\n"); + if (ri->type == SOM_INTERNAL_LOC_SET_FROM) { + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_FROM\n"); mmbit_set(som_store_valid, som_store_count, ri->onmatch); setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset); } else { - DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE\n"); + DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE\n"); if (ok_and_mark_if_write(som_store_valid, som_set_now, som_store_writable, som_store_count, ri->onmatch)) { diff --git a/src/som/som_runtime.h b/src/som/som_runtime.h index a0baa7633..30c7ace8c 100644 --- a/src/som/som_runtime.h +++ b/src/som/som_runtime.h @@ -26,10 +26,10 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief SOM runtime code. * - * * Runtime code for SOM handling called by the Rose callback adaptors. */ @@ -39,17 +39,17 @@ #include "scratch.h" #include "ue2common.h" -struct internal_report; +struct som_operation; void handleSomInternal(struct hs_scratch *scratch, - const struct internal_report *ri, const u64a to_offset); + const struct som_operation *ri, const u64a to_offset); // Returns the from_offset. u64a handleSomExternal(struct hs_scratch *scratch, - const struct internal_report *ri, const u64a to_offset); + const struct som_operation *ri, const u64a to_offset); void setSomFromSomAware(struct hs_scratch *scratch, - const struct internal_report *ri, u64a from_offset, + const struct som_operation *ri, u64a from_offset, u64a to_offset); int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset); diff --git a/src/util/exhaust.h b/src/util/exhaust.h index f7b7d6e95..b55c52d77 100644 --- a/src/util/exhaust.h +++ b/src/util/exhaust.h @@ -34,10 +34,12 @@ #define EXHAUST_H #include "rose/rose_internal.h" -#include "util/internal_report.h" #include "util/multibit.h" #include "ue2common.h" +/** Index meaning a given exhaustion key is invalid. */ +#define INVALID_EKEY (~(u32)0) + /** \brief Test whether the given key (\a ekey) is set in the exhaustion vector * \a evec. */ static really_inline diff --git a/src/util/internal_report.h b/src/util/internal_report.h deleted file mode 100644 index 1ed44ba21..000000000 --- a/src/util/internal_report.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Definition of an internal_report, along with convenience functions. - */ - -#ifndef INTERNAL_REPORT_H -#define INTERNAL_REPORT_H - -#include "ue2common.h" - -/* internal_report::type values */ - -#define EXTERNAL_CALLBACK 0 -#define EXTERNAL_CALLBACK_SOM_REL 1 -#define INTERNAL_SOM_LOC_SET 2 -#define INTERNAL_SOM_LOC_SET_IF_UNSET 3 -#define INTERNAL_SOM_LOC_SET_IF_WRITABLE 4 -#define INTERNAL_SOM_LOC_SET_SOM_REV_NFA 5 -#define INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET 6 -#define INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE 7 -#define INTERNAL_SOM_LOC_COPY 8 -#define INTERNAL_SOM_LOC_COPY_IF_WRITABLE 9 -#define INTERNAL_SOM_LOC_MAKE_WRITABLE 10 -#define EXTERNAL_CALLBACK_SOM_STORED 11 -#define EXTERNAL_CALLBACK_SOM_ABS 12 -#define EXTERNAL_CALLBACK_SOM_REV_NFA 13 - -/** set the som loc to the value in from_offset */ -#define INTERNAL_SOM_LOC_SET_FROM 14 - -/** set the som loc to the value in from_offset */ -#define INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE 15 -#define INTERNAL_ROSE_CHAIN 16 - -/** Index meaning a given exhaustion key is invalid. */ -#define INVALID_EKEY 0xffffffff - -/** \brief All the data we use for handling a match, bytecode representation. - * - * Includes extparam constraints and bounds, exhaustion/dedupe keys, offset - * adjustment and SOM information. - * - * At compile time, this data is in the ue2::Report structure, which is - * converted to internal_report for layout in the bytecode. - */ -struct ALIGN_CL_DIRECTIVE internal_report { - /** \brief from EXTERNAL_ and INTERNAL_ defined above. */ - u8 type; - - /** \brief do we actually use minOffset, maxOffset */ - u8 hasBounds; - - /** \brief use SOM for minLength, but don't report it to user callback. */ - u8 quashSom; - - /** \brief min offset in the stream at which this report can match. */ - u64a minOffset; - - /** \brief max offset in the stream at which this report can match. */ - u64a maxOffset; - - /** \brief min match length (start of match to current offset) */ - u64a minLength; - - /** \brief Exhaustion key. - * - * If exhaustible, the ekey to check before reporting a match. - * Additionally after reporting a match the ekey will be set. If not - * exhaustible, this will be INVALID_EKEY. */ - u32 ekey; - - /** \brief Dedupe key. */ - u32 dkey; - - /** \brief Adjustment to add to the match offset when we report a match. - * - * This is usually used for reports attached to states that form part of a - * zero-width assertion, like '$'. */ - s32 offsetAdjust; - - /** \brief Match report ID, for external reports. - * - * - external callback -> external report id - * - internal_som_* -> som loc to modify, - * - INTERNAL_ROSE_CHAIN -> top event to push on - * - otherwise target subnfa. */ - u32 onmatch; - - union { - /** \brief SOM distance value, use varies according to type. - * - * - for EXTERNAL_CALLBACK_SOM_REL, from-offset is this many bytes - * before the to-offset. - * - for EXTERNAL_CALLBACK_SOM_ABS, set from-offset to this value. - * - for INTERNAL_SOM_LOC_COPY*, som location read_from. - */ - u64a somDistance; - - /** \brief Index of the reverse nfa. - * Used by EXTERNAL_CALLBACK_SOM_REV_NFA and - * INTERNAL_SOM_LOC_SET_SOM_REV_NFA* - */ - u64a revNfaIndex; - - /** - * Used by INTERNAL_ROSE_CHAIN, Number of bytes behind us that we are - * allowed to squash identical top events on the queue. - */ - u64a topSquashDistance; - } aux; -}; - -static really_inline -int isInternalSomReport(const struct internal_report *ri) { - switch (ri->type) { - case INTERNAL_SOM_LOC_SET: - case INTERNAL_SOM_LOC_SET_IF_UNSET: - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - case INTERNAL_SOM_LOC_COPY: - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - case INTERNAL_SOM_LOC_SET_FROM: - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - return 1; - case EXTERNAL_CALLBACK: - case EXTERNAL_CALLBACK_SOM_REL: - case EXTERNAL_CALLBACK_SOM_STORED: - case EXTERNAL_CALLBACK_SOM_ABS: - case EXTERNAL_CALLBACK_SOM_REV_NFA: - case INTERNAL_ROSE_CHAIN: - return 0; - default: - break; // fall through - } - assert(0); // unknown? - return 0; -} - -#ifndef NDEBUG -/* used in asserts */ -static UNUSED -char isExternalReport(const struct internal_report *ir) { - switch (ir->type) { - case INTERNAL_SOM_LOC_SET: - case INTERNAL_SOM_LOC_SET_IF_UNSET: - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - case INTERNAL_SOM_LOC_COPY: - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - case INTERNAL_SOM_LOC_SET_FROM: - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - case INTERNAL_ROSE_CHAIN: - return 0; - case EXTERNAL_CALLBACK: - case EXTERNAL_CALLBACK_SOM_REL: - case EXTERNAL_CALLBACK_SOM_STORED: - case EXTERNAL_CALLBACK_SOM_ABS: - case EXTERNAL_CALLBACK_SOM_REV_NFA: - return 1; - default: - break; // fall through - } - assert(0); // unknown? - return 1; -} -#endif - -#endif // INTERNAL_REPORT_H diff --git a/src/util/report.cpp b/src/util/report.cpp deleted file mode 100644 index 58f10fd45..000000000 --- a/src/util/report.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "internal_report.h" -#include "report.h" -#include "report_manager.h" - -namespace ue2 { - -void writeInternalReport(const Report &report, const ReportManager &rm, - internal_report *ir) { - assert(ir); - assert(ISALIGNED(ir)); - - ir->type = report.type; - ir->hasBounds = report.hasBounds() ? 1 : 0; - ir->quashSom = report.quashSom ? 1 : 0; - ir->minOffset = report.minOffset; - ir->maxOffset = report.maxOffset; - ir->minLength = report.minLength; - ir->ekey = report.ekey; - ir->offsetAdjust = report.offsetAdjust; - ir->onmatch = report.onmatch; - - switch (report.type) { - case INTERNAL_ROSE_CHAIN: - ir->aux.topSquashDistance = report.topSquashDistance; - break; - case EXTERNAL_CALLBACK_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - ir->aux.revNfaIndex = report.revNfaIndex; - break; - default: - ir->aux.somDistance = report.somDistance; - break; - } - - // Dedupe keys are managed by ReportManager. - ir->dkey = rm.getDkey(report); -} - -} // namespace ue2 diff --git a/src/util/report.h b/src/util/report.h index c4f3bd8c0..59a113002 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -34,31 +34,49 @@ #ifndef UTIL_REPORT_H #define UTIL_REPORT_H -#include "internal_report.h" +#include "util/exhaust.h" // for INVALID_EKEY #include "order_check.h" #include "ue2common.h" #include -struct internal_report; - namespace ue2 { class ReportManager; +enum ReportType { + EXTERNAL_CALLBACK, + EXTERNAL_CALLBACK_SOM_REL, + INTERNAL_SOM_LOC_SET, + INTERNAL_SOM_LOC_SET_IF_UNSET, + INTERNAL_SOM_LOC_SET_IF_WRITABLE, + INTERNAL_SOM_LOC_SET_SOM_REV_NFA, + INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET, + INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE, + INTERNAL_SOM_LOC_COPY, + INTERNAL_SOM_LOC_COPY_IF_WRITABLE, + INTERNAL_SOM_LOC_MAKE_WRITABLE, + EXTERNAL_CALLBACK_SOM_STORED, + EXTERNAL_CALLBACK_SOM_ABS, + EXTERNAL_CALLBACK_SOM_REV_NFA, + INTERNAL_SOM_LOC_SET_FROM, + INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE, + INTERNAL_ROSE_CHAIN, + EXTERNAL_CALLBACK_SOM_PASS +}; + /** * \brief All the data we use for handling a match. * * Includes extparam constraints and bounds, exhaustion/dedupe keys, offset * adjustment and SOM information. * - * The data in this structure is converted into an \ref internal_report in the - * bytecode. + * The data in this structure eventually becomes a list of Rose programs + * instructions. */ struct Report { - Report(u8 type_in, u32 onmatch_in) : type(type_in), onmatch(onmatch_in) { - assert(type <= INTERNAL_ROSE_CHAIN); - } + Report(ReportType type_in, u32 onmatch_in) + : type(type_in), onmatch(onmatch_in) {} /** \brief True if this report has bounds from extended parameters, i.e. * min offset, max offset, min length. */ @@ -66,8 +84,8 @@ struct Report { return minOffset > 0 || maxOffset < MAX_OFFSET || minLength > 0; } - /** \brief from EXTERNAL_ and INTERNAL_ defined in internal_report.h. */ - u8 type; + /** \brief Type of this report. */ + ReportType type; /** \brief use SOM for minLength, but don't report it to user callback. */ bool quashSom = false; @@ -147,6 +165,7 @@ bool isInternalSomReport(const Report &r) { case EXTERNAL_CALLBACK_SOM_ABS: case EXTERNAL_CALLBACK_SOM_REV_NFA: case INTERNAL_ROSE_CHAIN: + case EXTERNAL_CALLBACK_SOM_PASS: return false; default: break; // fall through @@ -176,6 +195,7 @@ bool isExternalReport(const Report &r) { case EXTERNAL_CALLBACK_SOM_STORED: case EXTERNAL_CALLBACK_SOM_ABS: case EXTERNAL_CALLBACK_SOM_REV_NFA: + case EXTERNAL_CALLBACK_SOM_PASS: return true; default: break; // fall through @@ -228,7 +248,7 @@ Report makeSomRelativeCallback(u32 report, s32 offsetAdjust, u64a distance) { } static inline -Report makeRoseTrigger(u32 event, u64a squashDistance) { +Report makeMpvTrigger(u32 event, u64a squashDistance) { Report ir(INTERNAL_ROSE_CHAIN, event); ir.ekey = INVALID_EKEY; ir.topSquashDistance = squashDistance; @@ -254,22 +274,6 @@ bool isSimpleExhaustible(const Report &ir) { return true; } -/** True if this report requires some of the more esoteric processing in the - * rose adaptor, rather than just firing a callback or doing SOM handling. - */ -static inline -bool isComplexReport(const Report &ir) { - if (ir.hasBounds() || ir.ekey != INVALID_EKEY) { - return true; - } - - return false; -} - -/** \brief Write the given Report into an internal_report structure. */ -void writeInternalReport(const Report &report, const ReportManager &rm, - internal_report *ir); - } // namespace #endif // UTIL_REPORT_H From 1d85987d9685ac3f0a8b33773dba5c8a06421054 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Apr 2016 10:08:36 +1000 Subject: [PATCH 150/218] FINAL_REPORT: Add specialised instruction Specialisation of the REPORT instruction that also terminates execution of the program. Improves performance on programs that generate many reports. --- src/rose/program_runtime.h | 12 ++++++++++ src/rose/rose_build_bytecode.cpp | 39 ++++++++++++++++++++++++++++++-- src/rose/rose_dump.cpp | 6 +++++ src/rose/rose_program.h | 12 ++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index db1dc8c16..be56bec7b 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1165,6 +1165,18 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(FINAL_REPORT) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, + INVALID_EKEY) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + /* One-shot specialisation: this instruction always terminates + * execution of the program. */ + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_EXHAUSTED) { DEBUG_PRINTF("check ekey %u\n", ri->ekey); assert(ri->ekey != INVALID_EKEY); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index add3ac2d8..6407f125e 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -212,6 +212,7 @@ class RoseInstruction { case ROSE_INSTR_REPORT_SOM: return &u.reportSom; case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust; case ROSE_INSTR_DEDUPE_AND_REPORT: return &u.dedupeAndReport; + case ROSE_INSTR_FINAL_REPORT: return &u.finalReport; case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted; case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength; case ROSE_INSTR_SET_STATE: return &u.setState; @@ -257,6 +258,7 @@ class RoseInstruction { case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust); case ROSE_INSTR_DEDUPE_AND_REPORT: return sizeof(u.dedupeAndReport); + case ROSE_INSTR_FINAL_REPORT: return sizeof(u.finalReport); case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted); case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); @@ -301,6 +303,7 @@ class RoseInstruction { ROSE_STRUCT_REPORT_SOM reportSom; ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust; ROSE_STRUCT_DEDUPE_AND_REPORT dedupeAndReport; + ROSE_STRUCT_FINAL_REPORT finalReport; ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted; ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength; ROSE_STRUCT_SET_STATE setState; @@ -2160,6 +2163,31 @@ flattenProgram(const vector> &programs) { return out; } +static +void applyFinalSpecialisation(vector &program) { + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); + if (program.size() < 2) { + return; + } + + /* Replace the second-to-last instruction (before END) with a one-shot + * specialisation if available. */ + auto &ri = *(next(program.rbegin())); + switch (ri.code()) { + case ROSE_INSTR_REPORT: { + DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); + auto ri2 = RoseInstruction(ROSE_INSTR_FINAL_REPORT); + ri2.u.finalReport.onmatch = ri.u.report.onmatch; + ri2.u.finalReport.offset_adjust = ri.u.report.offset_adjust; + ri = ri2; + break; + } + default: + break; + } +} + static void recordResources(RoseResources &resources, const vector &program) { @@ -3020,7 +3048,9 @@ u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, for (const auto &id : reports) { makeReport(build, id, has_som, program); } - return writeProgram(bc, flattenProgram({program})); + program = flattenProgram({program}); + applyFinalSpecialisation(program); + return writeProgram(bc, program); } static @@ -3374,6 +3404,7 @@ pair makeSparseIterProgram(build_context &bc, program.insert(end(program), begin(root_program), end(root_program)); } + applyFinalSpecialisation(program); return {writeProgram(bc, program), iter_offset}; } @@ -3634,6 +3665,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, makePushDelayedInstructions(build, final_id, program); assert(!program.empty()); program = flattenProgram({program}); + applyFinalSpecialisation(program); return writeProgram(bc, program); } @@ -3714,7 +3746,9 @@ u32 buildReportPrograms(RoseBuildImpl &build, build_context &bc) { const bool has_som = false; makeCatchupMpv(build, bc, id, program); makeReport(build, id, has_som, program); - programs[id] = writeProgram(bc, flattenProgram({program})); + program = flattenProgram({program}); + applyFinalSpecialisation(program); + programs[id] = writeProgram(bc, program); DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, programs.back(), program.size()); } @@ -3792,6 +3826,7 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { u32 iter_offset = addPredBlocks(bc, predProgramLists, program, true); assert(program.size() > 1); + applyFinalSpecialisation(program); return {writeProgram(bc, program), iter_offset}; } diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 73f5940ba..f6badd1ba 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -416,6 +416,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(FINAL_REPORT) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_EXHAUSTED) { os << " ekey " << ri->ekey << endl; os << " fail_jump " << offset + ri->fail_jump << endl; diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 834e997f7..01572dbd0 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -82,6 +82,12 @@ enum RoseInstructionCode { /** \brief Super-instruction combining DEDUPE and REPORT. */ ROSE_INSTR_DEDUPE_AND_REPORT, + /** + * \brief Fire a report and stop program execution. This is a + * specialisation intended for short, frequently-executed programs. + */ + ROSE_INSTR_FINAL_REPORT, + ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. ROSE_INSTR_SET_STATE, //!< Switch a state index on. @@ -282,6 +288,12 @@ struct ROSE_STRUCT_DEDUPE_AND_REPORT { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_FINAL_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. +}; + struct ROSE_STRUCT_CHECK_EXHAUSTED { u8 code; //!< From enum RoseInstructionCode. u32 ekey; //!< Exhaustion key to check. From 92976c640aeccc3a8ff741a5e4eadd64cf08d97b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 15 Apr 2016 10:45:52 +1000 Subject: [PATCH 151/218] report: remove dead function satisfiesMinLength --- src/report.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/report.h b/src/report.h index 861abb37e..d037d11b4 100644 --- a/src/report.h +++ b/src/report.h @@ -44,21 +44,6 @@ #include "util/exhaust.h" #include "util/fatbit.h" -static really_inline -int satisfiesMinLength(u64a min_len, u64a from_offset, - u64a to_offset) { - assert(min_len); - - if (from_offset == HS_OFFSET_PAST_HORIZON) { - DEBUG_PRINTF("SOM beyond horizon\n"); - return 1; - } - - DEBUG_PRINTF("match len=%llu, min len=%llu\n", to_offset - from_offset, - min_len); - return to_offset - from_offset >= min_len; -} - enum DedupeResult { DEDUPE_CONTINUE, //!< Continue with match, not a dupe. DEDUPE_SKIP, //!< Don't report this match, dupe or delayed due to SOM. From 4541eaa92cd7fa832885b9f34be038b2c12dea62 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 15 Apr 2016 10:48:09 +1000 Subject: [PATCH 152/218] report: remove dead function isInternalSomReport --- src/util/report.h | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/util/report.h b/src/util/report.h index 59a113002..24ecca9d4 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -144,36 +144,6 @@ struct Report { u64a topSquashDistance = 0; }; -static inline -bool isInternalSomReport(const Report &r) { - switch (r.type) { - case INTERNAL_SOM_LOC_SET: - case INTERNAL_SOM_LOC_SET_IF_UNSET: - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - case INTERNAL_SOM_LOC_COPY: - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - case INTERNAL_SOM_LOC_SET_FROM: - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - return true; - case EXTERNAL_CALLBACK: - case EXTERNAL_CALLBACK_SOM_REL: - case EXTERNAL_CALLBACK_SOM_STORED: - case EXTERNAL_CALLBACK_SOM_ABS: - case EXTERNAL_CALLBACK_SOM_REV_NFA: - case INTERNAL_ROSE_CHAIN: - case EXTERNAL_CALLBACK_SOM_PASS: - return false; - default: - break; // fall through - } - assert(0); // unknown? - return false; -} - static inline bool isExternalReport(const Report &r) { switch (r.type) { From 84daf1fc1ac30c9eb0134fe3bae0e88925874593 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 15 Apr 2016 10:56:43 +1000 Subject: [PATCH 153/218] ng_limex_accel: delete unused DAccelScheme ctor --- src/nfagraph/ng_limex_accel.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 7bad1c669..1f991f198 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -284,8 +284,6 @@ struct DAccelScheme { assert(double_offset <= MAX_ACCEL_DEPTH); } - DAccelScheme() {} - bool operator<(const DAccelScheme &b) const { const DAccelScheme &a = *this; From ac318412146f7e8a75b3b4099d1b336d2f2283c6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Apr 2016 11:05:01 +1000 Subject: [PATCH 154/218] scratch: delete unused hs_scratch::qNfaState --- src/scratch.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scratch.h b/src/scratch.h index a71dd21b0..21ec809cf 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -148,7 +148,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { struct RoseContext tctxt; char *bstate; /**< block mode states */ char *tstate; /**< state for transient roses */ - char *qNfaState; /**< queued NFA temp state */ char *fullState; /**< uncompressed NFA state */ struct mq *queues; struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid From fa27025bcb01b190776d243415ff5351f4b4ddaf Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 15 Apr 2016 15:28:23 +1000 Subject: [PATCH 155/218] Wrap MPV puffettes in a struct --- src/rose/rose_build_add.cpp | 6 +++--- src/rose/rose_build_bytecode.cpp | 6 +++--- src/rose/rose_build_impl.h | 23 ++++++++++++++++------- src/rose/rose_build_misc.cpp | 8 ++++---- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 7fecaeec6..5cf45dcce 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1780,7 +1780,7 @@ bool RoseBuildImpl::addOutfix(const raw_puff &rp) { mpv_outfix->chained = true; } - mpv_outfix->puffettes.push_back(rp); + mpv_outfix->mpv.puffettes.push_back(rp); mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); @@ -1804,7 +1804,7 @@ bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, mpv_outfix->chained = true; } - mpv_outfix->triggered_puffettes.push_back(rp); + mpv_outfix->mpv.triggered_puffettes.push_back(rp); mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); @@ -1816,7 +1816,7 @@ bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, * the caller */ *queue_out = mpv_outfix->get_queue(qif); - *event_out = MQE_TOP_FIRST + mpv_outfix->triggered_puffettes.size() - 1; + *event_out = MQE_TOP_FIRST + mpv_outfix->mpv.triggered_puffettes.size() - 1; return true; /* failure is not yet an option */ } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 6407f125e..1cd150c09 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1294,7 +1294,7 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { } } } - } else if (!outfix.puffettes.empty()) { + } else if (!outfix.mpv.puffettes.empty()) { assert(0); } @@ -1327,7 +1327,7 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, } assert(mpv->chained); - auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes); + auto nfa = mpvCompile(mpv->mpv.puffettes, mpv->mpv.triggered_puffettes); assert(nfa); if (!nfa) { throw CompileError("Unable to generate bytecode."); @@ -1347,7 +1347,7 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, } add_nfa_to_blob(bc, *nfa); - *mpv_as_outfix = !mpv->puffettes.empty(); + *mpv_as_outfix = !mpv->mpv.puffettes.empty(); } static diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 7b59c59e9..d2a19f78f 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -290,6 +290,18 @@ bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { return 0; } +struct MpvProto { + bool empty() const { + return puffettes.empty() && triggered_puffettes.empty(); + } + void reset() { + puffettes.clear(); + triggered_puffettes.clear(); + } + std::vector puffettes; + std::vector triggered_puffettes; +}; + struct OutfixInfo { /* TODO: poly */ OutfixInfo() {} explicit OutfixInfo(std::unique_ptr r) : rdfa(std::move(r)) { @@ -310,28 +322,25 @@ struct OutfixInfo { /* TODO: poly */ } bool is_nonempty_mpv() const { - return !puffettes.empty() || !triggered_puffettes.empty(); + return !mpv.empty(); } bool is_dead() const { - return !holder && !rdfa && !haig && puffettes.empty() && - triggered_puffettes.empty(); + return !holder && !rdfa && !haig && mpv.empty(); } void clear() { holder.reset(); rdfa.reset(); haig.reset(); - puffettes.clear(); - triggered_puffettes.clear(); + mpv.reset(); assert(is_dead()); } std::unique_ptr holder; std::unique_ptr rdfa; std::unique_ptr haig; - std::vector puffettes; - std::vector triggered_puffettes; + MpvProto mpv; RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index e5c5b4e62..2904e4acc 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -631,10 +631,10 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } if (tbi.mpv_outfix) { - for (const auto &puff : tbi.mpv_outfix->puffettes) { + for (const auto &puff : tbi.mpv_outfix->mpv.puffettes) { puff_map[puff.report].insert(&puff); } - for (const auto &puff : tbi.mpv_outfix->triggered_puffettes) { + for (const auto &puff : tbi.mpv_outfix->mpv.triggered_puffettes) { puff_map[puff.report].insert(&puff); } } @@ -886,10 +886,10 @@ set all_reports(const OutfixInfo &outfix) { insert(&reports, all_reports(*outfix.haig)); } - for (const auto &puff : outfix.puffettes) { + for (const auto &puff : outfix.mpv.puffettes) { reports.insert(puff.report); } - for (const auto &puff : outfix.triggered_puffettes) { + for (const auto &puff : outfix.mpv.triggered_puffettes) { reports.insert(puff.report); } From 32c866a8f94ac128425f238c81aa6f3c174ef32f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 15 Apr 2016 16:41:35 +1000 Subject: [PATCH 156/218] OutfixInfo: use boost::variant for engines --- src/rose/rose_build_add.cpp | 14 ++++-- src/rose/rose_build_bytecode.cpp | 39 +++++++-------- src/rose/rose_build_compile.cpp | 4 +- src/rose/rose_build_impl.h | 82 +++++++++++++++++++++++--------- src/rose/rose_build_merge.cpp | 57 ++++++++++++---------- src/rose/rose_build_misc.cpp | 46 +++++++++++------- 6 files changed, 150 insertions(+), 92 deletions(-) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 5cf45dcce..f2ef2ca9c 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1776,11 +1776,13 @@ bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) { bool RoseBuildImpl::addOutfix(const raw_puff &rp) { if (!mpv_outfix) { - mpv_outfix = make_unique(); + mpv_outfix = make_unique(MpvProto()); mpv_outfix->chained = true; } - mpv_outfix->mpv.puffettes.push_back(rp); + auto *mpv = mpv_outfix->mpv(); + assert(mpv); + mpv->puffettes.push_back(rp); mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); @@ -1800,11 +1802,13 @@ bool RoseBuildImpl::addOutfix(const raw_puff &rp) { bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) { if (!mpv_outfix) { - mpv_outfix = make_unique(); + mpv_outfix = make_unique(MpvProto()); mpv_outfix->chained = true; } - mpv_outfix->mpv.triggered_puffettes.push_back(rp); + auto *mpv = mpv_outfix->mpv(); + assert(mpv); + mpv->triggered_puffettes.push_back(rp); mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); @@ -1816,7 +1820,7 @@ bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, * the caller */ *queue_out = mpv_outfix->get_queue(qif); - *event_out = MQE_TOP_FIRST + mpv_outfix->mpv.triggered_puffettes.size() - 1; + *event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1; return true; /* failure is not yet an option */ } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 1cd150c09..6c3b1844a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1264,14 +1264,14 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { const ReportManager &rm = tbi.rm; aligned_unique_ptr n; - if (outfix.rdfa) { + if (auto *rdfa = outfix.rdfa()) { // Unleash the McClellan! - n = mcclellanCompile(*outfix.rdfa, cc); - } else if (outfix.haig) { + n = mcclellanCompile(*rdfa, cc); + } else if (auto *haig = outfix.haig()) { // Unleash the Goughfish! - n = goughCompile(*outfix.haig, tbi.ssm.somPrecision(), cc); - } else if (outfix.holder) { - NGHolder &h = *outfix.holder; + n = goughCompile(*haig, tbi.ssm.somPrecision(), cc); + } else if (auto *holder = outfix.holder()) { + NGHolder &h = *holder; assert(h.kind == NFA_OUTFIX); // Build NFA. @@ -1294,8 +1294,8 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { } } } - } else if (!outfix.mpv.puffettes.empty()) { - assert(0); + } else if (auto *mpv = outfix.mpv()) { + assert(mpv->puffettes.empty()); } if (n && tbi.cc.grey.reverseAccelerate) { @@ -1310,34 +1310,36 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, bool *mpv_as_outfix) { assert(bc.engineOffsets.empty()); // MPV should be first *mpv_as_outfix = false; - OutfixInfo *mpv = nullptr; + OutfixInfo *mpv_outfix = nullptr; /* assume outfixes are just above chain tails in queue indices */ for (auto &out : tbi.outfixes) { if (out.is_nonempty_mpv()) { - assert(!mpv); - mpv = &out; + assert(!mpv_outfix); + mpv_outfix = &out; } else { assert(!out.chained); } } - if (!mpv) { + if (!mpv_outfix) { return; } - assert(mpv->chained); - auto nfa = mpvCompile(mpv->mpv.puffettes, mpv->mpv.triggered_puffettes); + assert(mpv_outfix->chained); + auto *mpv = mpv_outfix->mpv(); + auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes); assert(nfa); if (!nfa) { throw CompileError("Unable to generate bytecode."); } if (tbi.cc.grey.reverseAccelerate) { - buildReverseAcceleration(nfa.get(), mpv->rev_info, mpv->minWidth); + buildReverseAcceleration(nfa.get(), mpv_outfix->rev_info, + mpv_outfix->minWidth); } - u32 qi = mpv->get_queue(tbi.qif); + u32 qi = mpv_outfix->get_queue(tbi.qif); nfa->queueIndex = qi; DEBUG_PRINTF("built mpv\n"); @@ -1347,7 +1349,7 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, } add_nfa_to_blob(bc, *nfa); - *mpv_as_outfix = !mpv->mpv.puffettes.empty(); + *mpv_as_outfix = !mpv->puffettes.empty(); } static @@ -1387,8 +1389,7 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, if (out.chained) { continue; /* already done */ } - DEBUG_PRINTF("building outfix %zd (holder %p rdfa %p)\n", - &out - &tbi.outfixes[0], out.holder.get(), out.rdfa.get()); + DEBUG_PRINTF("building outfix %zd\n", &out - &tbi.outfixes[0]); auto n = buildOutfix(tbi, out); if (!n) { assert(0); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 477335caf..12500599c 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1904,8 +1904,8 @@ bool extractSEPLiterals(const OutfixInfo &outfix, const ReportManager &rm, // SEP cases should always become DFAs, so that's the only extract code we // have implemented here. - if (outfix.rdfa) { - return extractSEPLiterals(*outfix.rdfa, lits_out); + if (outfix.rdfa()) { + return extractSEPLiterals(*outfix.rdfa(), lits_out); } DEBUG_PRINTF("cannot extract literals from outfix type\n"); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index d2a19f78f..4b9f6f103 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -48,6 +48,7 @@ #include #include #include +#include struct RoseEngine; @@ -302,17 +303,11 @@ struct MpvProto { std::vector triggered_puffettes; }; -struct OutfixInfo { /* TODO: poly */ - OutfixInfo() {} - explicit OutfixInfo(std::unique_ptr r) : rdfa(std::move(r)) { - assert(rdfa); - } - explicit OutfixInfo(std::unique_ptr h) : holder(std::move(h)) { - assert(holder); - } - explicit OutfixInfo(std::unique_ptr r) : haig(std::move(r)) { - assert(haig); - } +struct OutfixInfo { + template + explicit OutfixInfo(std::unique_ptr x) : proto(std::move(x)) {} + + explicit OutfixInfo(MpvProto mpv) : proto(std::move(mpv)) {} u32 get_queue(QueueIndexFactory &qif); @@ -322,25 +317,68 @@ struct OutfixInfo { /* TODO: poly */ } bool is_nonempty_mpv() const { - return !mpv.empty(); + auto *mpv = boost::get(&proto); + return mpv && !mpv->empty(); } bool is_dead() const { - return !holder && !rdfa && !haig && mpv.empty(); + auto *mpv = boost::get(&proto); + if (mpv) { + return mpv->empty(); + } + return boost::get(&proto) != nullptr; } void clear() { - holder.reset(); - rdfa.reset(); - haig.reset(); - mpv.reset(); - assert(is_dead()); + proto = boost::blank(); + } + + // Convenience accessor functions. + + NGHolder *holder() { + auto *up = boost::get>(&proto); + return up ? up->get() : nullptr; + } + raw_dfa *rdfa() { + auto *up = boost::get>(&proto); + return up ? up->get() : nullptr; + } + raw_som_dfa *haig() { + auto *up = boost::get>(&proto); + return up ? up->get() : nullptr; + } + MpvProto *mpv() { + return boost::get(&proto); + } + + // Convenience const accessor functions. + + const NGHolder *holder() const { + auto *up = boost::get>(&proto); + return up ? up->get() : nullptr; + } + const raw_dfa *rdfa() const { + auto *up = boost::get>(&proto); + return up ? up->get() : nullptr; + } + const raw_som_dfa *haig() const { + auto *up = boost::get>(&proto); + return up ? up->get() : nullptr; + } + const MpvProto *mpv() const { + return boost::get(&proto); } - std::unique_ptr holder; - std::unique_ptr rdfa; - std::unique_ptr haig; - MpvProto mpv; + /** + * \brief Variant wrapping the various engine types. If this is + * boost::blank, it means that this outfix is unused (dead). + */ + boost::variant< + boost::blank, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + MpvProto> proto = boost::blank(); RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index fd15e3963..664aaef78 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2451,8 +2451,9 @@ void mergeOutfixNfas(RoseBuildImpl &tbi, vector &nfas) { map nfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - if (outfixes[i].holder) { - nfa_mapping[outfixes[i].holder.get()] = i; + auto *holder = outfixes[i].holder(); + if (holder) { + nfa_mapping[holder] = i; } } @@ -2485,7 +2486,7 @@ struct MergeMcClellan { } static void transfer(OutfixInfo &outfix, unique_ptr d) { - outfix.rdfa = move(d); + outfix.proto = move(d); } private: @@ -2503,7 +2504,7 @@ struct MergeHaig { } static void transfer(OutfixInfo &outfix, unique_ptr d) { - outfix.haig = move(d); + outfix.proto = move(d); } private: @@ -2602,8 +2603,9 @@ void mergeOutfixDfas(RoseBuildImpl &tbi, vector &dfas) { * element addition. */ ue2::unordered_map dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - if (outfixes[i].rdfa) { - dfa_mapping[outfixes[i].rdfa.get()] = i; + auto *rdfa = outfixes[i].rdfa(); + if (rdfa) { + dfa_mapping[rdfa] = i; } } @@ -2624,10 +2626,10 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, bool seen_dfa = false; u32 nfa_count = 0; for (const auto &outfix : tbi.outfixes) { - if (outfix.holder) { + if (outfix.holder()) { DEBUG_PRINTF("nfa\n"); nfa_count++; - } else if (outfix.rdfa) { + } else if (outfix.rdfa()) { DEBUG_PRINTF("dfa\n"); seen_dfa = true; } @@ -2647,27 +2649,29 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, vector dfas; for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { - assert(!it->is_dead()); - assert(!it->chained); - if (it->rdfa) { - dfas.push_back(it->rdfa.get()); - dfa_mapping[it->rdfa.get()] = it - tbi.outfixes.begin(); + auto &outfix = *it; + assert(!outfix.is_dead()); + assert(!outfix.chained); + + if (outfix.rdfa()) { + auto *rdfa = outfix.rdfa(); + dfas.push_back(rdfa); + dfa_mapping[rdfa] = it - tbi.outfixes.begin(); continue; } - if (!it->holder) { + if (!outfix.holder()) { continue; } - NGHolder *h = it->holder.get(); + NGHolder *h = outfix.holder(); assert(h->kind == NFA_OUTFIX); auto rdfa = buildMcClellan(*h, &rm, grey); if (rdfa) { // Transform this outfix into a DFA and add it to the merge set. dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin(); dfas.push_back(rdfa.get()); - it->clear(); - it->rdfa = move(rdfa); + outfix.proto = move(rdfa); new_dfas++; } } @@ -2695,8 +2699,9 @@ void mergeOutfixHaigs(RoseBuildImpl &tbi, vector &dfas, ue2::unordered_map dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - if (outfixes[i].haig) { - dfa_mapping[outfixes[i].haig.get()] = i; + auto *haig = outfixes[i].haig(); + if (haig) { + dfa_mapping[haig] = i; } } @@ -2721,14 +2726,14 @@ void mergeOutfixes(RoseBuildImpl &tbi) { vector dfas; vector som_dfas; - for (const auto &outfix : tbi.outfixes) { + for (auto &outfix : tbi.outfixes) { assert(!outfix.chained); - if (outfix.rdfa) { - dfas.push_back(outfix.rdfa.get()); - } else if (outfix.holder) { - nfas.push_back(outfix.holder.get()); - } else if (outfix.haig) { - som_dfas.push_back(outfix.haig.get()); + if (outfix.rdfa()) { + dfas.push_back(outfix.rdfa()); + } else if (outfix.holder()) { + nfas.push_back(outfix.holder()); + } else if (outfix.haig()) { + som_dfas.push_back(outfix.haig()); } } diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 2904e4acc..b16e3a69e 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -631,10 +631,11 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } if (tbi.mpv_outfix) { - for (const auto &puff : tbi.mpv_outfix->mpv.puffettes) { + auto *mpv = tbi.mpv_outfix->mpv(); + for (const auto &puff : mpv->puffettes) { puff_map[puff.report].insert(&puff); } - for (const auto &puff : tbi.mpv_outfix->mpv.triggered_puffettes) { + for (const auto &puff : mpv->triggered_puffettes) { puff_map[puff.report].insert(&puff); } } @@ -785,12 +786,13 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( } has_outfix = true; - if (out.haig) { + if (out.haig()) { return true; /* haig may report matches with different SOM at the same offset */ } - if (out.holder && requiresDedupe(*out.holder, reports, tbi.cc.grey)) { + if (out.holder() && + requiresDedupe(*out.holder(), reports, tbi.cc.grey)) { return true; } } @@ -874,25 +876,33 @@ u32 OutfixInfo::get_queue(QueueIndexFactory &qif) { return queue; } -set all_reports(const OutfixInfo &outfix) { - set reports; - if (outfix.holder) { - insert(&reports, all_reports(*outfix.holder)); - } - if (outfix.rdfa) { - insert(&reports, all_reports(*outfix.rdfa)); - } - if (outfix.haig) { - insert(&reports, all_reports(*outfix.haig)); +namespace { +class OutfixAllReports : public boost::static_visitor> { +public: + set operator()(const boost::blank &) const { + return {}; } - for (const auto &puff : outfix.mpv.puffettes) { - reports.insert(puff.report); + template + set operator()(const unique_ptr &x) const { + return all_reports(*x); } - for (const auto &puff : outfix.mpv.triggered_puffettes) { - reports.insert(puff.report); + + set operator()(const MpvProto &mpv) const { + set reports; + for (const auto &puff : mpv.puffettes) { + reports.insert(puff.report); + } + for (const auto &puff : mpv.triggered_puffettes) { + reports.insert(puff.report); + } + return reports; } +}; +} +set all_reports(const OutfixInfo &outfix) { + auto reports = boost::apply_visitor(OutfixAllReports(), outfix.proto); assert(!reports.empty()); return reports; } From 16985ab6e5c72cdddd130351dd86e7e6767fec77 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Apr 2016 09:49:13 +1000 Subject: [PATCH 157/218] Use a visitor for outfix engine construction --- src/rose/rose_build_bytecode.cpp | 68 +++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 6c3b1844a..fe0903807 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1256,36 +1256,42 @@ bool hasNonSmallBlockOutfix(const vector &outfixes) { return false; } -static -aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { - assert(!outfix.is_dead()); // should not be marked dead. +namespace { +class OutfixBuilder : public boost::static_visitor> { +public: + explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {} - const CompileContext &cc = tbi.cc; - const ReportManager &rm = tbi.rm; + aligned_unique_ptr operator()(boost::blank&) const { + return nullptr; + }; - aligned_unique_ptr n; - if (auto *rdfa = outfix.rdfa()) { + aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the McClellan! - n = mcclellanCompile(*rdfa, cc); - } else if (auto *haig = outfix.haig()) { + return mcclellanCompile(*rdfa, build.cc); + } + + aligned_unique_ptr operator()(unique_ptr &haig) const { // Unleash the Goughfish! - n = goughCompile(*haig, tbi.ssm.somPrecision(), cc); - } else if (auto *holder = outfix.holder()) { + return goughCompile(*haig, build.ssm.somPrecision(), build.cc); + } + + aligned_unique_ptr operator()(unique_ptr &holder) const { + const CompileContext &cc = build.cc; + const ReportManager &rm = build.rm; + NGHolder &h = *holder; assert(h.kind == NFA_OUTFIX); // Build NFA. - if (!n) { - const map fixed_depth_tops; /* no tops */ - const map>> triggers; /* no tops */ - bool compress_state = cc.streaming; - n = constructNFA(h, &rm, fixed_depth_tops, triggers, compress_state, - cc); - } + const map fixed_depth_tops; /* no tops */ + const map>> triggers; /* no tops */ + bool compress_state = cc.streaming; + auto n = constructNFA(h, &rm, fixed_depth_tops, triggers, + compress_state, cc); // Try for a DFA upgrade. - if (n && cc.grey.roseMcClellanOutfix - && !has_bounded_repeats_other_than_firsts(*n)) { + if (n && cc.grey.roseMcClellanOutfix && + !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { auto d = mcclellanCompile(*rdfa, cc); @@ -1294,11 +1300,27 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { } } } - } else if (auto *mpv = outfix.mpv()) { - assert(mpv->puffettes.empty()); + + return n; + } + + aligned_unique_ptr operator()(UNUSED MpvProto &mpv) const { + // MPV construction handled separately. + assert(mpv.puffettes.empty()); + return nullptr; } - if (n && tbi.cc.grey.reverseAccelerate) { +private: + const RoseBuildImpl &build; +}; +} + +static +aligned_unique_ptr buildOutfix(RoseBuildImpl &build, OutfixInfo &outfix) { + assert(!outfix.is_dead()); // should not be marked dead. + + auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto); + if (n && build.cc.grey.reverseAccelerate) { buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth); } From 319d47ae4f1350af275bbbfcc14b9b66f1514547 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Apr 2016 13:12:15 +1000 Subject: [PATCH 158/218] Remove OutfixInfo::chained (which meant "is MPV") --- src/rose/rose_build_add.cpp | 2 -- src/rose/rose_build_bytecode.cpp | 5 ++--- src/rose/rose_build_impl.h | 1 - src/rose/rose_build_merge.cpp | 5 ----- 4 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index f2ef2ca9c..a0ccb7ae1 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1777,7 +1777,6 @@ bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) { bool RoseBuildImpl::addOutfix(const raw_puff &rp) { if (!mpv_outfix) { mpv_outfix = make_unique(MpvProto()); - mpv_outfix->chained = true; } auto *mpv = mpv_outfix->mpv(); @@ -1803,7 +1802,6 @@ bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) { if (!mpv_outfix) { mpv_outfix = make_unique(MpvProto()); - mpv_outfix->chained = true; } auto *mpv = mpv_outfix->mpv(); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index fe0903807..f79bce5bc 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1340,7 +1340,7 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, assert(!mpv_outfix); mpv_outfix = &out; } else { - assert(!out.chained); + assert(!out.mpv()); } } @@ -1348,7 +1348,6 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, return; } - assert(mpv_outfix->chained); auto *mpv = mpv_outfix->mpv(); auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes); assert(nfa); @@ -1408,7 +1407,7 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, assert(tbi.qif.allocated_count() == bc.engineOffsets.size()); for (auto &out : tbi.outfixes) { - if (out.chained) { + if (out.mpv()) { continue; /* already done */ } DEBUG_PRINTF("building outfix %zd\n", &out - &tbi.outfixes[0]); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 4b9f6f103..4122e0bda 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -385,7 +385,6 @@ struct OutfixInfo { depth minWidth = depth::infinity(); depth maxWidth = 0; u64a maxOffset = 0; - bool chained = false; bool in_sbmatcher = false; //!< handled by small-block matcher. private: diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 664aaef78..7a329e9a3 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2417,9 +2417,6 @@ void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) { // layer at runtime will protect us from extra matches if only one was in // the small block matcher. winner.in_sbmatcher &= victim.in_sbmatcher; - - // We should never have merged outfixes that differ in these properties. - assert(winner.chained == victim.chained); } static @@ -2651,7 +2648,6 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { auto &outfix = *it; assert(!outfix.is_dead()); - assert(!outfix.chained); if (outfix.rdfa()) { auto *rdfa = outfix.rdfa(); @@ -2727,7 +2723,6 @@ void mergeOutfixes(RoseBuildImpl &tbi) { vector som_dfas; for (auto &outfix : tbi.outfixes) { - assert(!outfix.chained); if (outfix.rdfa()) { dfas.push_back(outfix.rdfa()); } else if (outfix.holder()) { From fd7f30a5b55aa89111ebb47f8dc8af5e91693b1b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Apr 2016 13:21:34 +1000 Subject: [PATCH 159/218] Merge functions no longer need transfer function --- src/rose/rose_build_merge.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 7a329e9a3..42dd4d2fd 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2482,10 +2482,6 @@ struct MergeMcClellan { return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey); } - static void transfer(OutfixInfo &outfix, unique_ptr d) { - outfix.proto = move(d); - } - private: const ReportManager &rm; const Grey &grey; @@ -2500,10 +2496,6 @@ struct MergeHaig { return attemptToMergeHaig({d1, d2}, limit); } - static void transfer(OutfixInfo &outfix, unique_ptr d) { - outfix.proto = move(d); - } - private: const u32 limit; //!< state limit for merged result. }; @@ -2550,7 +2542,7 @@ void pairwiseDfaMerge(vector &dfas, RawDfa *dfa_ptr = rdfa.get(); dfa_mapping[dfa_ptr] = dfa_mapping[*it]; dfa_mapping.erase(*it); - merge_func.transfer(winner, move(rdfa)); + winner.proto = move(rdfa); mergeOutfixInfo(winner, victim); From f40aa6bd400d57b7d4c1b518515d3e083bd0c331 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Apr 2016 14:20:45 +1000 Subject: [PATCH 160/218] report_manager: store report -> program mapping --- src/rose/rose_build_bytecode.cpp | 1 + src/util/report_manager.cpp | 14 +++++++++++++- src/util/report_manager.h | 13 ++++++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index f79bce5bc..ea3a80a3d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3771,6 +3771,7 @@ u32 buildReportPrograms(RoseBuildImpl &build, build_context &bc) { program = flattenProgram({program}); applyFinalSpecialisation(program); programs[id] = writeProgram(bc, program); + build.rm.setProgramOffset(id, programs[id]); DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, programs.back(), program.size()); } diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index 425f166d1..8377ea036 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -217,6 +217,18 @@ Report ReportManager::getBasicInternalReport(const NGWrapper &g, s32 adj) { return makeECallback(g.reportId, adj, ekey); } +void ReportManager::setProgramOffset(ReportID id, u32 programOffset) { + assert(id < reportIds.size()); + assert(!contains(reportIdToProgramOffset, id)); + reportIdToProgramOffset.emplace(id, programOffset); +} + +u32 ReportManager::getProgramOffset(ReportID id) const { + assert(id < reportIds.size()); + assert(contains(reportIdToProgramOffset, id)); + return reportIdToProgramOffset.at(id); +} + static void ekeysUnion(std::set *ekeys, u32 more) { if (!ekeys->empty()) { diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 4fb68aacb..0eed2711b 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -113,6 +113,14 @@ class ReportManager : boost::noncopyable { * ~0U if no dkey is needed. */ u32 getDkey(const Report &r) const; + /** \brief Register a Rose program offset with the given report. */ + void setProgramOffset(ReportID id, u32 programOffset); + + /** \brief Fetch the program offset for a given report. It is a fatal error + * for this to be called with a report for which no program offset has been + * set. */ + u32 getProgramOffset(ReportID id) const; + private: /** \brief Grey box ref, for checking resource limits. */ const Grey &grey; @@ -127,6 +135,9 @@ class ReportManager : boost::noncopyable { /** \brief Mapping from ReportID to dedupe key. */ std::map reportIdToDedupeKey; + /** \brief Mapping from ReportID to Rose program offset in bytecode. */ + std::map reportIdToProgramOffset; + /** \brief Mapping from external match ids to information about that * id. */ std::map externalIdMap; From 611579511ca86df49161f1ad5fed65ad5c4c75ff Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Apr 2016 14:40:58 +1000 Subject: [PATCH 161/218] rose: remap reports to program offsets --- CMakeLists.txt | 2 + src/nfa/castlecompile.cpp | 26 +++++++++++ src/nfa/castlecompile.h | 5 ++- src/nfa/goughcompile_util.cpp | 68 +++++++++++++++++++++++++++++ src/nfa/goughcompile_util.h | 41 +++++++++++++++++ src/nfa/mcclellancompile_util.cpp | 23 ++++++++++ src/nfa/mcclellancompile_util.h | 3 ++ src/nfagraph/ng_limex.cpp | 29 +++++++++++- src/rose/catchup.c | 9 ++-- src/rose/match.c | 7 ++- src/rose/rose_build_bytecode.cpp | 45 +++++++++++++++---- src/smallwrite/smallwrite_build.cpp | 2 + unit/internal/limex_nfa.cpp | 18 +++++--- 13 files changed, 252 insertions(+), 26 deletions(-) create mode 100644 src/nfa/goughcompile_util.cpp create mode 100644 src/nfa/goughcompile_util.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a4973c026..0a416405b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,6 +593,8 @@ SET (hs_SRCS src/nfa/goughcompile_accel.cpp src/nfa/goughcompile_internal.h src/nfa/goughcompile_reg.cpp + src/nfa/goughcompile_util.cpp + src/nfa/goughcompile_util.h src/nfa/mcclellan.h src/nfa/mcclellan_internal.h src/nfa/mcclellancompile.cpp diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index d7312b855..3d99690a3 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -50,6 +50,7 @@ #include "util/make_unique.h" #include "util/multibit_build.h" #include "util/multibit_internal.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" #include "util/verify_types.h" #include "grey.h" @@ -990,4 +991,29 @@ unique_ptr makeHolder(const CastleProto &proto, nfa_kind kind, return g; } +static +void remapReportsToPrograms(PureRepeat &pr, const ReportManager &rm) { + if (pr.reports.empty()) { + return; + } + auto old_reports = pr.reports; + pr.reports.clear(); + for (const auto &r : old_reports) { + pr.reports.insert(rm.getProgramOffset(r)); + } +} + +void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm) { + for (auto &m : castle.repeats) { + remapReportsToPrograms(m.second, rm); + } + + auto old_report_map = castle.report_map; + castle.report_map.clear(); + for (auto &m : old_report_map) { + u32 program = rm.getProgramOffset(m.first); + castle.report_map[program].insert(begin(m.second), end(m.second)); + } +} + } // namespace ue2 diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index fc4bb9913..73c753268 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,6 +51,7 @@ namespace ue2 { class CharReach; class NGHolder; +class ReportManager; struct CompileContext; /** @@ -158,6 +159,8 @@ bool requiresDedupe(const CastleProto &proto, std::unique_ptr makeHolder(const CastleProto &castle, nfa_kind kind, const CompileContext &cc); +void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm); + } // namespace ue2 #endif // NFA_CASTLECOMPILE_H diff --git a/src/nfa/goughcompile_util.cpp b/src/nfa/goughcompile_util.cpp new file mode 100644 index 000000000..33030131c --- /dev/null +++ b/src/nfa/goughcompile_util.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "goughcompile.h" +#include "goughcompile_util.h" +#include "mcclellancompile_util.h" +#include "util/report_manager.h" + +#include "ue2common.h" + +using namespace std; +using namespace ue2; + +namespace ue2 { + +static +void remapReportsToPrograms(set &reports, + const ReportManager &rm) { + if (reports.empty()) { + return; + } + auto old_reports = reports; + reports.clear(); + for (const auto &r : old_reports) { + u32 program = rm.getProgramOffset(r.report); + reports.emplace(program, r.slot); + } +} + +void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm) { + DEBUG_PRINTF("remap haig reports\n"); + + for (auto &ds : haig.state_som) { + remapReportsToPrograms(ds.reports, rm); + remapReportsToPrograms(ds.reports_eod, rm); + } + + // McClellan-style reports too. + raw_dfa &rdfa = haig; + remapReportsToPrograms(rdfa, rm); +} + +} // namespace ue2 diff --git a/src/nfa/goughcompile_util.h b/src/nfa/goughcompile_util.h new file mode 100644 index 000000000..05c9d90d0 --- /dev/null +++ b/src/nfa/goughcompile_util.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GOUGHCOMPILE_UTIL_H +#define GOUGHCOMPILE_UTIL_H + +namespace ue2 { + +struct raw_som_dfa; +class ReportManager; + +void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm); + +} // namespace ue2 + +#endif // GOUGHCOMPILE_UTIL_H diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 234574d84..d05a67760 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -395,4 +395,27 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { } } +static +void remapReportsToPrograms(flat_set &reports, + const ReportManager &rm) { + if (reports.empty()) { + return; + } + auto old_reports = reports; + reports.clear(); + for (const ReportID &id : old_reports) { + u32 program = rm.getProgramOffset(id); + reports.insert(program); + } +} + +void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm) { + DEBUG_PRINTF("remap dfa reports\n"); + for (auto &ds : rdfa.states) { + remapReportsToPrograms(ds.reports, rm); + remapReportsToPrograms(ds.reports_eod, rm); + } +} + + } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 7b6c033a9..e8bfd4a19 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -31,6 +31,7 @@ #include "rdfa.h" #include "ue2common.h" +#include "util/report_manager.h" #include @@ -57,6 +58,8 @@ size_t hash_dfa(const raw_dfa &rdfa); dstate_id_t get_sds_or_proxy(const raw_dfa &raw); +void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 17f93bb21..713fe370b 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -50,8 +50,9 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/verify_types.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" +#include "util/verify_types.h" #include #include @@ -346,6 +347,25 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, return h; } +static +void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { + for (const auto &v : vertices_range(h)) { + auto &reports = h[v].reports; + if (reports.empty()) { + continue; + } + auto old_reports = reports; + reports.clear(); + for (const ReportID &id : old_reports) { + u32 program = rm.getProgramOffset(id); + reports.insert(program); + } + DEBUG_PRINTF("vertex %u: remapped reports {%s} to programs {%s}\n", + h[v].index, as_string_list(old_reports).c_str(), + as_string_list(reports).c_str()); + } +} + static aligned_unique_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, @@ -393,6 +413,11 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, set zombies = findZombies(*h, br_cyclic, state_ids, cc); + if (generates_callbacks(*h)) { + assert(rm); + remapReportsToPrograms(*h, *rm); + } + if (!cc.streaming || !cc.grey.compressNFAState) { compress_state = false; } diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 7c44bf9ff..d6e7860c5 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -49,14 +49,15 @@ typedef struct queue_match PQ_T; static really_inline int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a som, u64a offset, ReportID id, const char from_mpv) { - assert(id < rose->reportProgramCount); - const u32 *programs = getByOffset(rose, rose->reportProgramOffset); + const u32 program = id; + assert(program > 0); + assert(program % ROSE_INSTR_MIN_ALIGN == 0); const size_t match_len = 0; // Unused in this path. const char in_anchored = 0; const char in_catchup = 1; - roseRunProgram(rose, scratch, programs[id], som, offset, match_len, - in_anchored, in_catchup, from_mpv, 0); + roseRunProgram(rose, scratch, program, som, offset, match_len, in_anchored, + in_catchup, from_mpv, 0); return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; } diff --git a/src/rose/match.c b/src/rose/match.c index ef75b1134..4e9e72a6c 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -644,16 +644,15 @@ int roseReportAdaptor_i(u64a som, u64a offset, ReportID id, void *context) { const struct RoseEngine *rose = scratch->core_info.rose; - assert(id < rose->reportProgramCount); - const u32 *programs = getByOffset(rose, rose->reportProgramOffset); - + // Our match ID is the program offset. + const u32 program = id; const size_t match_len = 0; // Unused in this path. const char in_anchored = 0; const char in_catchup = 0; const char from_mpv = 0; const char skip_mpv_catchup = 1; hwlmcb_rv_t rv = - roseRunProgram(rose, scratch, programs[id], som, offset, match_len, + roseRunProgram(rose, scratch, program, som, offset, match_len, in_anchored, in_catchup, from_mpv, skip_mpv_catchup); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ea3a80a3d..0454c83b9 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -43,7 +43,9 @@ #include "hwlm/hwlm.h" /* engine types */ #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" +#include "nfa/goughcompile_util.h" #include "nfa/mcclellancompile.h" +#include "nfa/mcclellancompile_util.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" @@ -885,19 +887,25 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, const map>> &triggers, suffix_id suff, const CompileContext &cc) { if (suff.castle()) { - auto n = buildRepeatEngine(*suff.castle(), triggers, cc); + auto remapped_castle = *suff.castle(); + remapReportsToPrograms(remapped_castle, rm); + auto n = buildRepeatEngine(remapped_castle, triggers, cc); assert(n); return n; } if (suff.haig()) { - auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc); + auto remapped_haig = *suff.haig(); + remapReportsToPrograms(remapped_haig, rm); + auto n = goughCompile(remapped_haig, ssm.somPrecision(), cc); assert(n); return n; } if (suff.dfa()) { - auto d = mcclellanCompile(*suff.dfa(), cc); + auto remapped_rdfa = *suff.dfa(); + remapReportsToPrograms(remapped_rdfa, rm); + auto d = mcclellanCompile(remapped_rdfa, cc); assert(d); return d; } @@ -910,7 +918,9 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, // Take a shot at the LBR engine. if (oneTop) { - auto lbr = constructLBR(holder, triggers.at(0), cc); + auto remapped_holder = cloneHolder(holder); + remapReportsToPrograms(*remapped_holder, rm); + auto lbr = constructLBR(*remapped_holder, triggers.at(0), cc); if (lbr) { return lbr; } @@ -926,6 +936,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { + remapReportsToPrograms(*rdfa, rm); auto d = mcclellanCompile(*rdfa, cc); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { @@ -1267,12 +1278,16 @@ class OutfixBuilder : public boost::static_visitor> { aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the McClellan! - return mcclellanCompile(*rdfa, build.cc); + raw_dfa tmp(*rdfa); + remapReportsToPrograms(tmp, build.rm); + return mcclellanCompile(tmp, build.cc); } aligned_unique_ptr operator()(unique_ptr &haig) const { // Unleash the Goughfish! - return goughCompile(*haig, build.ssm.somPrecision(), build.cc); + raw_som_dfa tmp(*haig); + remapReportsToPrograms(tmp, build.rm); + return goughCompile(tmp, build.ssm.somPrecision(), build.cc); } aligned_unique_ptr operator()(unique_ptr &holder) const { @@ -1327,6 +1342,16 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &build, OutfixInfo &outfix) { return n; } +static +void remapReportsToPrograms(MpvProto &mpv, const ReportManager &rm) { + for (auto &puff : mpv.puffettes) { + puff.report = rm.getProgramOffset(puff.report); + } + for (auto &puff : mpv.triggered_puffettes) { + puff.report = rm.getProgramOffset(puff.report); + } +} + static void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, bool *mpv_as_outfix) { @@ -1349,7 +1374,9 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, } auto *mpv = mpv_outfix->mpv(); - auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes); + auto tmp = *mpv; // copy + remapReportsToPrograms(tmp, tbi.rm); + auto nfa = mpvCompile(tmp.puffettes, tmp.triggered_puffettes); assert(nfa); if (!nfa) { throw CompileError("Unable to generate bytecode."); @@ -4000,6 +4027,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); + u32 reportProgramOffset = buildReportPrograms(*this, bc); + // Build NFAs set no_retrigger_queues; bool mpv_as_outfix; @@ -4045,8 +4074,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 eodIterOffset; tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc); - u32 reportProgramOffset = buildReportPrograms(*this, bc); - vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index dfefe5e8d..a3fe43d41 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -376,6 +376,8 @@ SmallWriteBuildImpl::build(u32 roseQuality) { DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); + remapReportsToPrograms(*rdfa, rm); + u32 start_offset; u32 small_region; auto nfa = prepEngine(*rdfa, roseQuality, cc, &start_offset, &small_region); diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 6d77fffe5..91ab09dbd 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -46,7 +46,9 @@ using namespace std; using namespace testing; using namespace ue2; -static const string SCAN_DATA = "___foo______\n___foofoo_foo_^^^^^^^^^^^^^^^^^^^^^^__bar_bar______0_______z_____bar"; +static const string SCAN_DATA = "___foo______\n___foofoo_foo_^^^^^^^^^^^^^^^^^^" + "^^^^__bar_bar______0_______z_____bar"; +static const u32 MATCH_REPORT = 1024; static int onMatch(u64a, ReportID, void *ctx) { @@ -75,6 +77,8 @@ class LimExModelTest : public TestWithParam { unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + rm.setProgramOffset(0, MATCH_REPORT); + const map fixed_depth_tops; const map>> triggers; bool compress_state = false; @@ -223,7 +227,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) { char rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(0, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(1, matches); @@ -232,7 +236,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) { rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(1, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(2, matches); @@ -241,7 +245,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) { rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(2, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(3, matches); @@ -267,10 +271,10 @@ TEST_P(LimExModelTest, QueueExecRose) { pushQueue(&q, MQE_TOP, 0); pushQueue(&q, MQE_END, end); - char rv = nfaQueueExecRose(nfa.get(), &q, 0 /* report id */); + char rv = nfaQueueExecRose(nfa.get(), &q, MATCH_REPORT); ASSERT_EQ(MO_MATCHES_PENDING, rv); pushQueue(&q, MQE_START, end); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); } TEST_P(LimExModelTest, CheckFinalState) { @@ -367,6 +371,8 @@ class LimExZombieTest : public TestWithParam { unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + rm.setProgramOffset(0, MATCH_REPORT); + const map fixed_depth_tops; const map>> triggers; bool compress_state = false; From 1f41a921f2ca72103a24a041c9b21a7c064abd99 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 19 Apr 2016 11:53:10 +1000 Subject: [PATCH 162/218] mcclellan, gough: native report remapping --- CMakeLists.txt | 2 - src/nfa/goughcompile.cpp | 34 +++++++++------ src/nfa/goughcompile.h | 3 +- src/nfa/goughcompile_util.cpp | 68 ----------------------------- src/nfa/goughcompile_util.h | 41 ----------------- src/nfa/mcclellancompile.cpp | 28 ++++++++---- src/nfa/mcclellancompile.h | 8 +++- src/nfa/mcclellancompile_util.cpp | 23 ---------- src/nfa/mcclellancompile_util.h | 3 -- src/nfa/nfa_kind.h | 37 +++++++++++++--- src/nfagraph/ng_split.cpp | 2 + src/rose/rose_build_add.cpp | 1 + src/rose/rose_build_anchored.cpp | 11 ++--- src/rose/rose_build_bytecode.cpp | 35 +++++++-------- src/rose/rose_build_dump.cpp | 2 + src/smallwrite/smallwrite_build.cpp | 12 ++--- 16 files changed, 112 insertions(+), 198 deletions(-) delete mode 100644 src/nfa/goughcompile_util.cpp delete mode 100644 src/nfa/goughcompile_util.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a416405b..a4973c026 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,8 +593,6 @@ SET (hs_SRCS src/nfa/goughcompile_accel.cpp src/nfa/goughcompile_internal.h src/nfa/goughcompile_reg.cpp - src/nfa/goughcompile_util.cpp - src/nfa/goughcompile_util.h src/nfa/mcclellan.h src/nfa/mcclellan_internal.h src/nfa/mcclellancompile.cpp diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index b75e0463e..647dc496e 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -41,8 +41,9 @@ #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" -#include "util/verify_types.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" +#include "util/verify_types.h" #include "ue2common.h" @@ -77,9 +78,10 @@ namespace { class gough_build_strat : public mcclellan_build_strat { public: - gough_build_strat(raw_som_dfa &r, const GoughGraph &g, - const map &accel_info) - : mcclellan_build_strat(r), rdfa(r), gg(g), + gough_build_strat( + raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm, + const map &accel_info) + : mcclellan_build_strat(r, rm), rdfa(r), gg(g), accel_gough_info(accel_info) {} unique_ptr gatherReports(vector &reports /* out */, vector &reports_eod /* out */, @@ -1035,7 +1037,8 @@ void update_accel_prog_offset(const gough_build_strat &gbs, } aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc) { + const CompileContext &cc, + const ReportManager &rm) { assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 || !cc.streaming); @@ -1067,7 +1070,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, map accel_allowed; find_allowed_accel_states(*cfg, blocks, &accel_allowed); - gough_build_strat gbs(raw, *cfg, accel_allowed); + gough_build_strat gbs(raw, *cfg, rm, accel_allowed); aligned_unique_ptr basic_dfa = mcclellanCompile_i(raw, gbs, cc); assert(basic_dfa); if (!basic_dfa) { @@ -1195,10 +1198,11 @@ namespace { struct raw_gough_report_list { set reports; - explicit raw_gough_report_list( - const vector> &raw_reports) { + raw_gough_report_list( + const vector> &raw_reports, + const ReportManager &rm, bool do_remap) { for (const auto &m : raw_reports) { - ReportID r = m.first; + ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first; u32 impl_slot = INVALID_SLOT; if (m.second) { impl_slot = m.second->slot; @@ -1227,11 +1231,13 @@ unique_ptr gough_build_strat::gatherReports( vector &reports_eod, u8 *isSingleReport, ReportID *arbReport) const { - unique_ptr ri = - ue2::make_unique(); - map rev; DEBUG_PRINTF("gathering reports\n"); + const bool remap_reports = has_managed_reports(rdfa.kind); + + auto ri = ue2::make_unique(); + map rev; + assert(!rdfa.states.empty()); vector verts(rdfa.states.size()); @@ -1250,7 +1256,7 @@ unique_ptr gough_build_strat::gatherReports( continue; } - raw_gough_report_list rrl(gg[v].reports); + raw_gough_report_list rrl(gg[v].reports, rm, remap_reports); DEBUG_PRINTF("non empty r %zu\n", reports.size()); if (rev.find(rrl) != rev.end()) { reports.push_back(rev[rrl]); @@ -1269,7 +1275,7 @@ unique_ptr gough_build_strat::gatherReports( } DEBUG_PRINTF("non empty r eod\n"); - raw_gough_report_list rrl(gg[v].reports_eod); + raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports); if (rev.find(rrl) != rev.end()) { reports_eod.push_back(rev[rrl]); continue; diff --git a/src/nfa/goughcompile.h b/src/nfa/goughcompile.h index 9da983d45..54f98cef2 100644 --- a/src/nfa/goughcompile.h +++ b/src/nfa/goughcompile.h @@ -89,7 +89,8 @@ struct raw_som_dfa : public raw_dfa { }; aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc); + const CompileContext &cc, + const ReportManager &rm); } // namespace ue2 diff --git a/src/nfa/goughcompile_util.cpp b/src/nfa/goughcompile_util.cpp deleted file mode 100644 index 33030131c..000000000 --- a/src/nfa/goughcompile_util.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "goughcompile.h" -#include "goughcompile_util.h" -#include "mcclellancompile_util.h" -#include "util/report_manager.h" - -#include "ue2common.h" - -using namespace std; -using namespace ue2; - -namespace ue2 { - -static -void remapReportsToPrograms(set &reports, - const ReportManager &rm) { - if (reports.empty()) { - return; - } - auto old_reports = reports; - reports.clear(); - for (const auto &r : old_reports) { - u32 program = rm.getProgramOffset(r.report); - reports.emplace(program, r.slot); - } -} - -void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm) { - DEBUG_PRINTF("remap haig reports\n"); - - for (auto &ds : haig.state_som) { - remapReportsToPrograms(ds.reports, rm); - remapReportsToPrograms(ds.reports_eod, rm); - } - - // McClellan-style reports too. - raw_dfa &rdfa = haig; - remapReportsToPrograms(rdfa, rm); -} - -} // namespace ue2 diff --git a/src/nfa/goughcompile_util.h b/src/nfa/goughcompile_util.h deleted file mode 100644 index 05c9d90d0..000000000 --- a/src/nfa/goughcompile_util.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef GOUGHCOMPILE_UTIL_H -#define GOUGHCOMPILE_UTIL_H - -namespace ue2 { - -struct raw_som_dfa; -class ReportManager; - -void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm); - -} // namespace ue2 - -#endif // GOUGHCOMPILE_UTIL_H diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 8bc0b9d86..a9fbce94c 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -46,6 +46,7 @@ #include "util/container.h" #include "util/make_unique.h" #include "util/order_check.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" #include "util/unaligned.h" #include "util/verify_types.h" @@ -356,8 +357,16 @@ namespace { struct raw_report_list { flat_set reports; - explicit raw_report_list(const flat_set &reports_in) - : reports(reports_in) {} + raw_report_list(const flat_set &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } bool operator<(const raw_report_list &b) const { return reports < b.reports; @@ -380,6 +389,8 @@ unique_ptr mcclellan_build_strat::gatherReports( ReportID *arbReport) const { DEBUG_PRINTF("gathering reports\n"); + const bool remap_reports = has_managed_reports(rdfa.kind); + auto ri = ue2::make_unique(); map rev; @@ -389,7 +400,7 @@ unique_ptr mcclellan_build_strat::gatherReports( continue; } - raw_report_list rrl(s.reports); + raw_report_list rrl(s.reports, rm, remap_reports); DEBUG_PRINTF("non empty r\n"); if (rev.find(rrl) != rev.end()) { reports.push_back(rev[rrl]); @@ -408,7 +419,7 @@ unique_ptr mcclellan_build_strat::gatherReports( } DEBUG_PRINTF("non empty r eod\n"); - raw_report_list rrl(s.reports_eod); + raw_report_list rrl(s.reports_eod, rm, remap_reports); if (rev.find(rrl) != rev.end()) { reports_eod.push_back(rev[rrl]); continue; @@ -579,8 +590,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, return nullptr; } - unique_ptr ri - = info.strat.gatherReports(reports, reports_eod, &single, &arb); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); @@ -799,8 +809,7 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, ReportID arb; u8 single; - unique_ptr ri - = info.strat.gatherReports(reports, reports_eod, &single, &arb); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); @@ -1086,8 +1095,9 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, } aligned_unique_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, set *accel_states) { - mcclellan_build_strat mbs(raw); + mcclellan_build_strat mbs(raw, rm); return mcclellanCompile_i(raw, mbs, cc, accel_states); } diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 781e262da..ba519cac0 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -44,6 +44,7 @@ struct NFA; namespace ue2 { +class ReportManager; struct CompileContext; struct raw_report_info { @@ -57,6 +58,7 @@ struct raw_report_info { class dfa_build_strat { public: + explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {} virtual ~dfa_build_strat(); virtual raw_dfa &get_raw() const = 0; virtual std::unique_ptr gatherReports( @@ -68,11 +70,14 @@ class dfa_build_strat { virtual size_t accelSize(void) const = 0; virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) = 0; +protected: + const ReportManager &rm; }; class mcclellan_build_strat : public dfa_build_strat { public: - explicit mcclellan_build_strat(raw_dfa &r) : rdfa(r) {} + mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) + : dfa_build_strat(rm_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( std::vector &reports /* out */, @@ -93,6 +98,7 @@ class mcclellan_build_strat : public dfa_build_strat { * states */ ue2::aligned_unique_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, std::set *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index d05a67760..234574d84 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -395,27 +395,4 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { } } -static -void remapReportsToPrograms(flat_set &reports, - const ReportManager &rm) { - if (reports.empty()) { - return; - } - auto old_reports = reports; - reports.clear(); - for (const ReportID &id : old_reports) { - u32 program = rm.getProgramOffset(id); - reports.insert(program); - } -} - -void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm) { - DEBUG_PRINTF("remap dfa reports\n"); - for (auto &ds : rdfa.states) { - remapReportsToPrograms(ds.reports, rm); - remapReportsToPrograms(ds.reports_eod, rm); - } -} - - } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index e8bfd4a19..7b6c033a9 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -31,7 +31,6 @@ #include "rdfa.h" #include "ue2common.h" -#include "util/report_manager.h" #include @@ -58,8 +57,6 @@ size_t hash_dfa(const raw_dfa &rdfa); dstate_id_t get_sds_or_proxy(const raw_dfa &raw); -void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm); - } // namespace ue2 #endif diff --git a/src/nfa/nfa_kind.h b/src/nfa/nfa_kind.h index f13e3770d..128698a4c 100644 --- a/src/nfa/nfa_kind.h +++ b/src/nfa/nfa_kind.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,17 +39,44 @@ enum nfa_kind { NFA_INFIX, //!< rose infix NFA_SUFFIX, //!< rose suffix NFA_OUTFIX, //!< "outfix" nfa not triggered by external events + NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports NFA_REV_PREFIX, //! reverse running prefixes (for som) }; -static UNUSED +inline bool is_triggered(enum nfa_kind k) { - return k == NFA_INFIX || k == NFA_SUFFIX || k == NFA_REV_PREFIX; + switch (k) { + case NFA_INFIX: + case NFA_SUFFIX: + case NFA_REV_PREFIX: + return true; + default: + return false; + } } -static UNUSED +inline bool generates_callbacks(enum nfa_kind k) { - return k == NFA_SUFFIX || k == NFA_OUTFIX || k == NFA_REV_PREFIX; + switch (k) { + case NFA_SUFFIX: + case NFA_OUTFIX: + case NFA_OUTFIX_RAW: + case NFA_REV_PREFIX: + return true; + default: + return false; + } +} + +inline +bool has_managed_reports(enum nfa_kind k) { + switch (k) { + case NFA_SUFFIX: + case NFA_OUTFIX: + return true; + default: + return false; + } } } // namespace ue2 diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index 3a66b2fad..42157e1eb 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -113,6 +113,7 @@ void splitLHS(const NGHolder &base, const vector &pivots, lhs->kind = NFA_INFIX; break; case NFA_REV_PREFIX: + case NFA_OUTFIX_RAW: assert(0); break; } @@ -154,6 +155,7 @@ void splitRHS(const NGHolder &base, const vector &pivots, rhs->kind = NFA_SUFFIX; break; case NFA_REV_PREFIX: + case NFA_OUTFIX_RAW: assert(0); break; } diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index a0ccb7ae1..23c122a75 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1039,6 +1039,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, } break; case NFA_REV_PREFIX: + case NFA_OUTFIX_RAW: DEBUG_PRINTF("kind %u\n", (u32)h.kind); assert(0); } diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 805dc920e..35ff7138b 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -660,7 +660,7 @@ int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { Automaton_Holder autom(h); - unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX); + unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { return finalise_out(tbi, h, autom, move(out_dfa), remap); } @@ -721,7 +721,7 @@ void buildSimpleDfas(const RoseBuildImpl &tbi, NGHolder h; populate_holder(simple.first, exit_ids, &h); Automaton_Holder autom(h); - unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX); + unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX_RAW); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(!rv); rdfa->start_anchored = INIT_STATE; @@ -771,7 +771,8 @@ vector> getAnchoredDfas(RoseBuildImpl &build) { static size_t buildNfas(vector &anchored_dfas, vector> *nfas, - vector *start_offset, const CompileContext &cc) { + vector *start_offset, const CompileContext &cc, + const ReportManager &rm) { const size_t num_dfas = anchored_dfas.size(); nfas->reserve(num_dfas); @@ -785,7 +786,7 @@ size_t buildNfas(vector &anchored_dfas, minimize_hopcroft(rdfa, cc.grey); - auto nfa = mcclellanCompile(rdfa, cc); + auto nfa = mcclellanCompile(rdfa, cc, rm); if (!nfa) { assert(0); throw std::bad_alloc(); @@ -836,7 +837,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, vector> nfas; vector start_offset; // start offset for each dfa (dots removed) - size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc); + size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); if (total_size > cc.grey.limitRoseAnchoredSize) { throw ResourceLimitError(); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0454c83b9..5db24b3b8 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -43,7 +43,6 @@ #include "hwlm/hwlm.h" /* engine types */ #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" -#include "nfa/goughcompile_util.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_api_queue.h" @@ -895,17 +894,13 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } if (suff.haig()) { - auto remapped_haig = *suff.haig(); - remapReportsToPrograms(remapped_haig, rm); - auto n = goughCompile(remapped_haig, ssm.somPrecision(), cc); + auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc, rm); assert(n); return n; } if (suff.dfa()) { - auto remapped_rdfa = *suff.dfa(); - remapReportsToPrograms(remapped_rdfa, rm); - auto d = mcclellanCompile(remapped_rdfa, cc); + auto d = mcclellanCompile(*suff.dfa(), cc, rm); assert(d); return d; } @@ -936,8 +931,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - remapReportsToPrograms(*rdfa, rm); - auto d = mcclellanCompile(*rdfa, cc); + auto d = mcclellanCompile(*rdfa, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n)); @@ -1024,6 +1018,8 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, const bool is_prefix, const bool is_transient, const map > &infixTriggers, const CompileContext &cc) { + const ReportManager &rm = tbi.rm; + aligned_unique_ptr n; // Should compress state if this rose is non-transient and we're in @@ -1054,12 +1050,12 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, } if (left.dfa()) { - n = mcclellanCompile(*left.dfa(), cc); + n = mcclellanCompile(*left.dfa(), cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = mcclellanCompile(*rdfa, cc); + n = mcclellanCompile(*rdfa, cc, rm); } } @@ -1083,7 +1079,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc); + auto d = mcclellanCompile(*rdfa, cc, rm); assert(d); n = pickImpl(move(d), move(n)); } @@ -1115,6 +1111,7 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, bool do_prefix) { const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; + const ReportManager &rm = tbi.rm; ue2::unordered_map seen; // already built queue indices @@ -1165,7 +1162,8 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, // Need to build NFA, which is either predestined to be a Haig (in // SOM mode) or could be all manner of things. if (leftfix.haig()) { - nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc); + nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc, + rm); } else { assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); nfa = makeLeftNfa(tbi, leftfix, is_prefix, is_transient, @@ -1278,16 +1276,13 @@ class OutfixBuilder : public boost::static_visitor> { aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the McClellan! - raw_dfa tmp(*rdfa); - remapReportsToPrograms(tmp, build.rm); - return mcclellanCompile(tmp, build.cc); + return mcclellanCompile(*rdfa, build.cc, build.rm); } aligned_unique_ptr operator()(unique_ptr &haig) const { // Unleash the Goughfish! - raw_som_dfa tmp(*haig); - remapReportsToPrograms(tmp, build.rm); - return goughCompile(tmp, build.ssm.somPrecision(), build.cc); + return goughCompile(*haig, build.ssm.somPrecision(), build.cc, + build.rm); } aligned_unique_ptr operator()(unique_ptr &holder) const { @@ -1309,7 +1304,7 @@ class OutfixBuilder : public boost::static_visitor> { !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc); + auto d = mcclellanCompile(*rdfa, cc, rm); if (d) { n = pickImpl(move(d), move(n)); } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e56f322b3..079dd5568 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -73,6 +73,8 @@ string to_string(nfa_kind k) { return "o"; case NFA_REV_PREFIX: return "r"; + case NFA_OUTFIX_RAW: + return "O"; } assert(0); return "?"; diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index a3fe43d41..792a3d5b3 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -303,14 +303,15 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, static aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, - const CompileContext &cc, u32 *start_offset, + const CompileContext &cc, + const ReportManager &rm, u32 *start_offset, u32 *small_region) { *start_offset = remove_leading_dots(rdfa); // Unleash the McClellan! set accel_states; - auto nfa = mcclellanCompile(rdfa, cc, &accel_states); + auto nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); if (!nfa) { DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); return nullptr; @@ -328,7 +329,7 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } - nfa = mcclellanCompile(rdfa, cc, &accel_states); + nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); if (!nfa) { DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ @@ -376,11 +377,10 @@ SmallWriteBuildImpl::build(u32 roseQuality) { DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); - remapReportsToPrograms(*rdfa, rm); - u32 start_offset; u32 small_region; - auto nfa = prepEngine(*rdfa, roseQuality, cc, &start_offset, &small_region); + auto nfa = + prepEngine(*rdfa, roseQuality, cc, rm, &start_offset, &small_region); if (!nfa) { DEBUG_PRINTF("some smallwrite outfix could not be prepped\n"); /* just skip the smallwrite optimization */ From ec985a62f82b37cd41212f2df52d1256a8e1492c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 19 Apr 2016 13:51:41 +1000 Subject: [PATCH 163/218] castle: add nfa kind to CastleProto --- src/nfa/castlecompile.cpp | 7 ++++--- src/nfa/castlecompile.h | 7 +++++-- src/rose/rose_build_castle.cpp | 10 ++++------ 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 3d99690a3..c05cd9591 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -680,7 +680,7 @@ depth findMaxWidth(const CastleProto &proto, u32 top) { return proto.repeats.at(top).bounds.max; } -CastleProto::CastleProto(const PureRepeat &pr) { +CastleProto::CastleProto(nfa_kind k, const PureRepeat &pr) : kind(k) { assert(pr.reach.any()); assert(pr.reports.size() == 1); u32 top = 0; @@ -742,6 +742,7 @@ u32 CastleProto::merge(const PureRepeat &pr) { bool mergeCastle(CastleProto &c1, const CastleProto &c2, map &top_map) { assert(&c1 != &c2); + assert(c1.kind == c2.kind); DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n", c1.repeats.size(), c2.repeats.size()); @@ -954,7 +955,7 @@ bool hasZeroMinBound(const CastleProto &proto) { return false; } -unique_ptr makeHolder(const CastleProto &proto, nfa_kind kind, +unique_ptr makeHolder(const CastleProto &proto, const CompileContext &cc) { assert(!proto.repeats.empty()); @@ -967,7 +968,7 @@ unique_ptr makeHolder(const CastleProto &proto, nfa_kind kind, } } - unique_ptr g = ue2::make_unique(kind); + auto g = ue2::make_unique(proto.kind); for (const auto &m : proto.repeats) { if (m.first >= NFA_MAX_TOP_MASKS) { diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index 73c753268..1019fb90b 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -66,7 +66,7 @@ struct CompileContext; */ struct CastleProto { static constexpr size_t max_occupancy = 65536; // arbitrary limit - explicit CastleProto(const PureRepeat &pr); + CastleProto(nfa_kind k, const PureRepeat &pr); const CharReach &reach() const; /** \brief Add a new repeat. */ @@ -95,6 +95,9 @@ struct CastleProto { * so we track this explicitly instead of using repeats.size(). */ u32 next_top = 1; + + /** \brief Kind for this engine. */ + nfa_kind kind; }; std::set all_reports(const CastleProto &proto); @@ -156,7 +159,7 @@ bool requiresDedupe(const CastleProto &proto, /** * \brief Build an NGHolder from a CastleProto. */ -std::unique_ptr makeHolder(const CastleProto &castle, nfa_kind kind, +std::unique_ptr makeHolder(const CastleProto &castle, const CompileContext &cc); void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm); diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp index 83c69e708..c65e840d8 100644 --- a/src/rose/rose_build_castle.cpp +++ b/src/rose/rose_build_castle.cpp @@ -77,7 +77,7 @@ void makeCastle(LeftEngInfo &left, if (isPureRepeat(h, pr) && pr.reports.size() == 1) { DEBUG_PRINTF("vertex preceded by infix repeat %s\n", pr.bounds.str().c_str()); - left.castle = make_shared(pr); + left.castle = make_shared(h.kind, pr); cache[&h] = left.castle; left.graph.reset(); } @@ -119,7 +119,7 @@ void makeCastleSuffix(RoseBuildImpl &tbi, RoseVertex v, return; } - suffix.castle = make_shared(pr); + suffix.castle = make_shared(h.kind, pr); cache[&h] = suffix.castle; suffix.graph.reset(); } @@ -264,8 +264,7 @@ bool unmakeCastles(RoseBuildImpl &tbi) { for (const auto &e : left_castles) { assert(e.first.castle()); - shared_ptr h = makeHolder(*e.first.castle(), NFA_INFIX, - tbi.cc); + shared_ptr h = makeHolder(*e.first.castle(), tbi.cc); if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { continue; } @@ -281,8 +280,7 @@ bool unmakeCastles(RoseBuildImpl &tbi) { for (const auto &e : suffix_castles) { assert(e.first.castle()); - shared_ptr h = makeHolder(*e.first.castle(), NFA_SUFFIX, - tbi.cc); + shared_ptr h = makeHolder(*e.first.castle(), tbi.cc); if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { continue; } From c101beb541a30d04148abb37128aa88852a486aa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 19 Apr 2016 14:42:45 +1000 Subject: [PATCH 164/218] castle, lbr: native report remap --- src/nfa/castle.c | 3 ++- src/nfa/castlecompile.cpp | 40 +++++++++----------------------- src/nfa/castlecompile.h | 4 +--- src/nfagraph/ng_lbr.cpp | 29 ++++++++++++++++++----- src/nfagraph/ng_lbr.h | 14 +++++------ src/rose/rose_build_bytecode.cpp | 18 ++++++-------- unit/internal/lbr.cpp | 13 +++++++---- 7 files changed, 59 insertions(+), 62 deletions(-) diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 5558381d5..13a44a97c 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -96,7 +96,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q, repeatHasMatch(info, rctrl, rstate, offset); DEBUG_PRINTF("repeatHasMatch returned %d\n", match); if (match == REPEAT_MATCH) { - DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, subIdx); + DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset, + subIdx, sub->report); if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index c05cd9591..4bddf767a 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -344,11 +344,14 @@ void buildSubcastles(const CastleProto &proto, vector &subs, u32 &scratchStateSize, u32 &streamStateSize, u32 &tableSize, vector &tables, u32 &sparseRepeats, const ExclusiveInfo &exclusiveInfo, - vector &may_stale) { + vector &may_stale, const ReportManager &rm) { + const bool remap_reports = has_managed_reports(proto.kind); + u32 i = 0; const auto &groupId = exclusiveInfo.groupId; const auto &numGroups = exclusiveInfo.numGroups; vector maxStreamSize(numGroups, 0); + for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); it != ite; ++it, ++i) { const PureRepeat &pr = it->second; @@ -400,7 +403,9 @@ void buildSubcastles(const CastleProto &proto, vector &subs, info.encodingSize = rsi.encodingSize; info.patchesOffset = rsi.patchesOffset; - sub.report = *pr.reports.begin(); + assert(pr.reports.size() == 1); + ReportID id = *pr.reports.begin(); + sub.report = remap_reports ? rm.getProgramOffset(id) : id; if (rtype == REPEAT_SPARSE_OPTIMAL_P) { for (u32 j = 0; j < rsi.patchSize; j++) { @@ -435,7 +440,7 @@ void buildSubcastles(const CastleProto &proto, vector &subs, aligned_unique_ptr buildCastle(const CastleProto &proto, const map>> &triggers, - const CompileContext &cc) { + const CompileContext &cc, const ReportManager &rm) { assert(cc.grey.allowCastle); const size_t numRepeats = proto.repeats.size(); @@ -548,7 +553,7 @@ buildCastle(const CastleProto &proto, buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, scratchStateSize, streamStateSize, tableSize, - tables, sparseRepeats, exclusiveInfo, may_stale); + tables, sparseRepeats, exclusiveInfo, may_stale, rm); DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); vector stale_iter; @@ -816,6 +821,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, ReportID report2) { assert(!c1.repeats.empty()); assert(!c2.repeats.empty()); + assert(c1.kind == c2.kind); if (c1.reach() != c2.reach()) { DEBUG_PRINTF("different reach\n"); @@ -862,6 +868,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, bool is_equal(const CastleProto &c1, const CastleProto &c2) { assert(!c1.repeats.empty()); assert(!c2.repeats.empty()); + assert(c1.kind == c2.kind); if (c1.reach() != c2.reach()) { DEBUG_PRINTF("different reach\n"); @@ -992,29 +999,4 @@ unique_ptr makeHolder(const CastleProto &proto, return g; } -static -void remapReportsToPrograms(PureRepeat &pr, const ReportManager &rm) { - if (pr.reports.empty()) { - return; - } - auto old_reports = pr.reports; - pr.reports.clear(); - for (const auto &r : old_reports) { - pr.reports.insert(rm.getProgramOffset(r)); - } -} - -void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm) { - for (auto &m : castle.repeats) { - remapReportsToPrograms(m.second, rm); - } - - auto old_report_map = castle.report_map; - castle.report_map.clear(); - for (auto &m : old_report_map) { - u32 program = rm.getProgramOffset(m.first); - castle.report_map[program].insert(begin(m.second), end(m.second)); - } -} - } // namespace ue2 diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index 1019fb90b..938e57c4d 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -123,7 +123,7 @@ void remapCastleTops(CastleProto &proto, std::map &top_map); ue2::aligned_unique_ptr buildCastle(const CastleProto &proto, const std::map>> &triggers, - const CompileContext &cc); + const CompileContext &cc, const ReportManager &rm); /** * \brief Merge two CastleProto prototypes together, if possible. @@ -162,8 +162,6 @@ bool requiresDedupe(const CastleProto &proto, std::unique_ptr makeHolder(const CastleProto &castle, const CompileContext &cc); -void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm); - } // namespace ue2 #endif // NFA_CASTLECOMPILE_H diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index b9cacaa75..d7183817d 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -36,17 +36,19 @@ #include "ng_holder.h" #include "ng_repeat.h" #include "ng_reports.h" -#include "nfa/shufticompile.h" -#include "nfa/trufflecompile.h" +#include "nfa/castlecompile.h" #include "nfa/lbr_internal.h" #include "nfa/nfa_internal.h" #include "nfa/repeatcompile.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" #include "util/alloc.h" #include "util/bitutils.h" // for lg2 #include "util/compile_context.h" #include "util/container.h" #include "util/depth.h" #include "util/dump_charclass.h" +#include "util/report_manager.h" #include "util/verify_types.h" using namespace std; @@ -294,13 +296,19 @@ aligned_unique_ptr constructLBR(const CharReach &cr, return nfa; } -aligned_unique_ptr constructLBR(const PureRepeat &repeat, +aligned_unique_ptr constructLBR(const CastleProto &proto, const vector> &triggers, - const CompileContext &cc) { + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } + if (proto.repeats.size() != 1) { + return nullptr; + } + + const PureRepeat &repeat = proto.repeats.begin()->second; assert(!repeat.reach.none()); if (repeat.reports.size() != 1) { @@ -317,6 +325,9 @@ aligned_unique_ptr constructLBR(const PureRepeat &repeat, } ReportID report = *repeat.reports.begin(); + if (has_managed_reports(proto.kind)) { + report = rm.getProgramOffset(report); + } DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str()); return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max, @@ -326,7 +337,8 @@ aligned_unique_ptr constructLBR(const PureRepeat &repeat, /** \brief Construct an LBR engine from the given graph \p g. */ aligned_unique_ptr constructLBR(const NGHolder &g, const vector> &triggers, - const CompileContext &cc) { + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } @@ -335,8 +347,13 @@ aligned_unique_ptr constructLBR(const NGHolder &g, if (!isPureRepeat(g, repeat)) { return nullptr; } + if (repeat.reports.size() != 1) { + DEBUG_PRINTF("too many reports\n"); + return nullptr; + } - return constructLBR(repeat, triggers, cc); + CastleProto proto(g.kind, repeat); + return constructLBR(proto, triggers, cc, rm); } /** \brief True if graph \p g could be turned into an LBR engine. */ diff --git a/src/nfagraph/ng_lbr.h b/src/nfagraph/ng_lbr.h index fad079ae4..99cb0fcb0 100644 --- a/src/nfagraph/ng_lbr.h +++ b/src/nfagraph/ng_lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,22 +46,22 @@ namespace ue2 { class CharReach; class NGHolder; class ReportManager; +struct CastleProto; struct CompileContext; -struct DepthMinMax; struct Grey; -struct PureRepeat; /** \brief Construct an LBR engine from the given graph \p g. */ aligned_unique_ptr constructLBR(const NGHolder &g, const std::vector> &triggers, - const CompileContext &cc); + const CompileContext &cc, const ReportManager &rm); -/** \brief Construct an LBR engine from the given PureRepeat. */ +/** \brief Construct an LBR engine from the given CastleProto, which should + * contain only one repeat. */ aligned_unique_ptr -constructLBR(const PureRepeat &repeat, +constructLBR(const CastleProto &proto, const std::vector> &triggers, - const CompileContext &cc); + const CompileContext &cc, const ReportManager &rm); /** \brief True if graph \p g could be turned into an LBR engine. */ bool isLBR(const NGHolder &g, const Grey &grey); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5db24b3b8..bcf91feaa 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -866,14 +866,14 @@ static aligned_unique_ptr buildRepeatEngine(const CastleProto &proto, const map>> &triggers, - const CompileContext &cc) { + const CompileContext &cc, const ReportManager &rm) { // If we only have one repeat, the LBR should always be the best possible // implementation. if (proto.repeats.size() == 1 && cc.grey.allowLbr) { - return constructLBR(proto.repeats.begin()->second, triggers.at(0), cc); + return constructLBR(proto, triggers.at(0), cc, rm); } - aligned_unique_ptr castle_nfa = buildCastle(proto, triggers, cc); + auto castle_nfa = buildCastle(proto, triggers, cc, rm); assert(castle_nfa); // Should always be constructible. return castle_nfa; } @@ -886,9 +886,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, const map>> &triggers, suffix_id suff, const CompileContext &cc) { if (suff.castle()) { - auto remapped_castle = *suff.castle(); - remapReportsToPrograms(remapped_castle, rm); - auto n = buildRepeatEngine(remapped_castle, triggers, cc); + auto n = buildRepeatEngine(*suff.castle(), triggers, cc, rm); assert(n); return n; } @@ -913,9 +911,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, // Take a shot at the LBR engine. if (oneTop) { - auto remapped_holder = cloneHolder(holder); - remapReportsToPrograms(*remapped_holder, rm); - auto lbr = constructLBR(*remapped_holder, triggers.at(0), cc); + auto lbr = constructLBR(holder, triggers.at(0), cc, rm); if (lbr) { return lbr; } @@ -1044,7 +1040,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, assert(!is_prefix); map > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); - n = buildRepeatEngine(*left.castle(), triggers, cc); + n = buildRepeatEngine(*left.castle(), triggers, cc, rm); assert(n); return n; // Castles/LBRs are always best! } @@ -1064,7 +1060,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, map > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); assert(contains(triggers, 0)); // single top - n = constructLBR(*left.graph(), triggers[0], cc); + n = constructLBR(*left.graph(), triggers[0], cc, rm); } if (!n && left.graph()) { diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index bb180e5fe..2bb359df9 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -49,6 +49,8 @@ using namespace std; using namespace testing; using namespace ue2; +static constexpr u32 MATCH_REPORT = 1024; + struct LbrTestParams { CharReach reach; u32 min; @@ -98,10 +100,11 @@ class LbrTest : public TestWithParam { ASSERT_TRUE(isLBR(*g, grey)); - vector > triggers; - triggers.push_back(vector()); - triggers.back().push_back(CharReach::dot()); /* lbr triggered by . */ - nfa = constructLBR(*g, triggers, cc); + rm.setProgramOffset(0, MATCH_REPORT); + + /* LBR triggered by dot */ + vector> triggers = {{CharReach::dot()}}; + nfa = constructLBR(*g, triggers, cc, rm); ASSERT_TRUE(nfa != nullptr); full_state = aligned_zmalloc_unique(nfa->scratchStateSize); @@ -247,7 +250,7 @@ TEST_P(LbrTest, QueueExecToMatch) { char rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(0, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(1, matches); } From ee7f31ac392a50cb207c653ee3a93c2af7ddecc3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 19 Apr 2016 15:22:20 +1000 Subject: [PATCH 165/218] mpv: native report remapping --- src/nfa/mpvcompile.cpp | 31 +++++++++++++++++++++---------- src/nfa/mpvcompile.h | 8 +++++--- src/rose/rose_build_bytecode.cpp | 14 +------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 7521afef8..b024b5302 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -36,6 +36,7 @@ #include "util/alloc.h" #include "util/multibit_internal.h" #include "util/order_check.h" +#include "util/report_manager.h" #include "util/verify_types.h" #include @@ -82,13 +83,21 @@ struct ClusterKey { } // namespace static -void writePuffette(mpv_puffette *out, const raw_puff &rp) { +void writePuffette(mpv_puffette *out, const raw_puff &rp, + const ReportManager &rm) { DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded, rp.report, out); out->repeats = rp.repeats; out->unbounded = rp.unbounded; out->simple_exhaust = rp.simple_exhaust; - out->report = rp.report; + out->report = rm.getProgramOffset(rp.report); +} + +static +void writeSentinel(mpv_puffette *out) { + DEBUG_PRINTF("outputting sentinel to %p\n", out); + memset(out, 0, sizeof(*out)); + out->report = INVALID_REPORT; } static @@ -147,8 +156,8 @@ void populateClusters(const vector &puffs_in, static void writeKiloPuff(const map>::const_iterator &it, - u32 counter_offset, mpv *m, mpv_kilopuff *kp, - mpv_puffette **pa) { + const ReportManager &rm, u32 counter_offset, mpv *m, + mpv_kilopuff *kp, mpv_puffette **pa) { const CharReach &reach = it->first.reach; const vector &puffs = it->second; @@ -181,11 +190,11 @@ void writeKiloPuff(const map>::const_iterator &it, kp->puffette_offset = verify_u32((char *)*pa - (char *)m); for (size_t i = 0; i < puffs.size(); i++) { assert(!it->first.auto_restart || puffs[i].unbounded); - writePuffette(*pa + i, puffs[i]); + writePuffette(*pa + i, puffs[i], rm); } *pa += puffs.size(); - writePuffette(*pa, raw_puff(0U, false, INVALID_REPORT, CharReach())); + writeSentinel(*pa); ++*pa; writeDeadPoint(kp, puffs); @@ -300,7 +309,8 @@ const mpv_counter_info &findCounter(const vector &counters, } aligned_unique_ptr mpvCompile(const vector &puffs_in, - const vector &triggered_puffs) { + const vector &triggered_puffs, + const ReportManager &rm) { assert(!puffs_in.empty() || !triggered_puffs.empty()); u32 puffette_count = puffs_in.size() + triggered_puffs.size(); @@ -340,7 +350,7 @@ aligned_unique_ptr mpvCompile(const vector &puffs_in, + sizeof(mpv_counter_info) * counters.size()); mpv_puffette *pa = pa_base; - writePuffette(pa, raw_puff(0U, false, INVALID_REPORT, CharReach())); + writeSentinel(pa); ++pa; /* skip init sentinel */ @@ -366,8 +376,9 @@ aligned_unique_ptr mpvCompile(const vector &puffs_in, mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1); mpv_kilopuff *kp = kp_begin; for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) { - writeKiloPuff(it, findCounter(counters, kp - kp_begin).counter_offset, - m, kp, &pa); + writeKiloPuff(it, rm, + findCounter(counters, kp - kp_begin).counter_offset, m, + kp, &pa); ++kp; } assert((char *)pa == (char *)nfa.get() + len); diff --git a/src/nfa/mpvcompile.h b/src/nfa/mpvcompile.h index ff4906ee7..fb91ac64e 100644 --- a/src/nfa/mpvcompile.h +++ b/src/nfa/mpvcompile.h @@ -40,6 +40,8 @@ struct NFA; namespace ue2 { +class ReportManager; + struct raw_puff { raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in, const CharReach &reach_in, bool auto_restart_in = false, @@ -59,9 +61,9 @@ struct raw_puff { * puffs in the triggered_puffs vector are enabled when an TOP_N event is * delivered corresponding to their index in the vector */ -aligned_unique_ptr -mpvCompile(const std::vector &puffs, - const std::vector &triggered_puffs); +aligned_unique_ptr mpvCompile(const std::vector &puffs, + const std::vector &triggered_puffs, + const ReportManager &rm); } // namespace ue2 diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bcf91feaa..30db15ffb 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1333,16 +1333,6 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &build, OutfixInfo &outfix) { return n; } -static -void remapReportsToPrograms(MpvProto &mpv, const ReportManager &rm) { - for (auto &puff : mpv.puffettes) { - puff.report = rm.getProgramOffset(puff.report); - } - for (auto &puff : mpv.triggered_puffettes) { - puff.report = rm.getProgramOffset(puff.report); - } -} - static void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, bool *mpv_as_outfix) { @@ -1365,9 +1355,7 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, } auto *mpv = mpv_outfix->mpv(); - auto tmp = *mpv; // copy - remapReportsToPrograms(tmp, tbi.rm); - auto nfa = mpvCompile(tmp.puffettes, tmp.triggered_puffettes); + auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes, tbi.rm); assert(nfa); if (!nfa) { throw CompileError("Unable to generate bytecode."); From 061068a2603ae287e876dc00f31cd918a15abf90 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 20 Apr 2016 09:47:12 +1000 Subject: [PATCH 166/218] nfa_kind: documentation --- src/nfa/nfa_kind.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/nfa/nfa_kind.h b/src/nfa/nfa_kind.h index 128698a4c..46d0bc4c1 100644 --- a/src/nfa/nfa_kind.h +++ b/src/nfa/nfa_kind.h @@ -26,6 +26,12 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief Data structures and helper functions used to describe the purpose of + * a particular NFA engine at build time. + */ + #ifndef NFA_KIND_H #define NFA_KIND_H @@ -43,6 +49,7 @@ enum nfa_kind { NFA_REV_PREFIX, //! reverse running prefixes (for som) }; +/** \brief True if this kind of engine is triggered by a top event. */ inline bool is_triggered(enum nfa_kind k) { switch (k) { @@ -55,6 +62,10 @@ bool is_triggered(enum nfa_kind k) { } } +/** + * \brief True if this kind of engine generates callback events when it + * enters accept states. + */ inline bool generates_callbacks(enum nfa_kind k) { switch (k) { @@ -68,6 +79,10 @@ bool generates_callbacks(enum nfa_kind k) { } } +/** + * \brief True if this kind of engine has reports that are managed by the \ref + * ReportManager. + */ inline bool has_managed_reports(enum nfa_kind k) { switch (k) { From 9e0ec02ac9ae61f7160b8f71986e749e85575290 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 20 Apr 2016 12:41:02 +1000 Subject: [PATCH 167/218] rose: assert that program offset is sane --- src/rose/catchup.c | 3 --- src/rose/program_runtime.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/rose/catchup.c b/src/rose/catchup.c index d6e7860c5..dba9629e2 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -50,9 +50,6 @@ static really_inline int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a som, u64a offset, ReportID id, const char from_mpv) { const u32 program = id; - assert(program > 0); - assert(program % ROSE_INSTR_MIN_ALIGN == 0); - const size_t match_len = 0; // Unused in this path. const char in_anchored = 0; const char in_catchup = 1; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index be56bec7b..783970709 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -830,7 +830,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, char skip_mpv_catchup) { DEBUG_PRINTF("program=%u, offsets [%llu,%llu]\n", programOffset, som, end); - assert(programOffset); + assert(programOffset >= sizeof(struct RoseEngine)); assert(programOffset < t->size); const char *pc_base = getByOffset(t, programOffset); From ea7197571a686c35d857d1669ccc2b880e84ddc5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 20 Apr 2016 15:37:17 +1000 Subject: [PATCH 168/218] ng_haig: move logic into base class Move all of the Automaton logic into Automaton_Base, which is templated on its StateSet/StateMap types. --- src/nfagraph/ng_haig.cpp | 217 +++++++++++++++------------------------ 1 file changed, 85 insertions(+), 132 deletions(-) diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 4409924b6..8fe4889d2 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -111,44 +111,28 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { } } +template class Automaton_Base { +public: + using StateSet = typename Automaton_Traits::StateSet; + using StateMap = typename Automaton_Traits::StateMap; + protected: Automaton_Base(const NGHolder &graph_in, - const flat_set &unused_in) - : graph(graph_in), unused(unused_in) { + const flat_set &unused_in, som_type som, + const vector> &triggers, + bool unordered_som) + : graph(graph_in), numStates(num_vertices(graph)), unused(unused_in), + init(Automaton_Traits::init_states(numStates)), + initDS(Automaton_Traits::init_states(numStates)), + squash(Automaton_Traits::init_states(numStates)), + accept(Automaton_Traits::init_states(numStates)), + acceptEod(Automaton_Traits::init_states(numStates)), + toppable(Automaton_Traits::init_states(numStates)), + dead(Automaton_Traits::init_states(numStates)) { calculateAlphabet(graph, alpha, unalpha, &alphasize); assert(alphasize <= ALPHABET_SIZE); - } - -public: - static bool canPrune(const flat_set &) { return false; } - - const NGHolder &graph; - const flat_set &unused; - - array alpha; - array unalpha; - u16 alphasize; - - set done_a; - set done_b; - - u16 start_anchored; - u16 start_floating; -}; -class Automaton_Big : public Automaton_Base { -public: - typedef dynamic_bitset<> StateSet; - typedef map StateMap; - - Automaton_Big(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, - const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, unused_in), numStates(num_vertices(graph)), - init(numStates), initDS(numStates), squash(numStates), - accept(numStates), acceptEod(numStates), toppable(numStates), - dead(numStates) { populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, &accept, &acceptEod); @@ -161,6 +145,8 @@ class Automaton_Big : public Automaton_Base { start_floating = DEAD_STATE; } + cr_by_index = populateCR(graph, v_by_index, alpha); + if (!unordered_som) { for (const auto &sq : findSquashers(graph, som)) { NFAVertex v = sq.first; @@ -170,16 +156,17 @@ class Automaton_Big : public Automaton_Base { } } - cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { - markToppableStarts(graph, unused, false, triggers, &toppable); + dynamic_bitset<> temp(numStates); + markToppableStarts(graph, unused, false, triggers, &temp); + toppable = Automaton_Traits::copy_states(temp, numStates); } } private: // Convert an NFAStateSet (as used by the squash code) into a StateSet. StateSet shrinkStateSet(const NFAStateSet &in) const { - StateSet out(dead.size()); + StateSet out = Automaton_Traits::init_states(numStates); for (size_t i = in.find_first(); i != in.npos && i < out.size(); i = in.find_next(i)) { out.set(i); @@ -187,20 +174,6 @@ class Automaton_Big : public Automaton_Base { return out; } -public: - void transition(const StateSet &in, StateSet *next) { - transition_graph(*this, v_by_index, in, next); - } - - const vector initial() { - vector rv(1, init); - if (start_floating != DEAD_STATE && start_floating != start_anchored) { - rv.push_back(initDS); - } - return rv; - } - -private: void reports_i(const StateSet &in, bool eod, flat_set &rv) { StateSet acc = in & (eod ? acceptEod : accept); for (size_t i = acc.find_first(); i != StateSet::npos; @@ -213,15 +186,42 @@ class Automaton_Big : public Automaton_Base { } public: + void transition(const StateSet &in, StateSet *next) { + transition_graph(*this, v_by_index, in, next); + } + + const vector initial() { + vector rv = {init}; + if (start_floating != DEAD_STATE && start_floating != start_anchored) { + rv.push_back(initDS); + } + return rv; + } + void reports(const StateSet &in, flat_set &rv) { reports_i(in, false, rv); } + void reportsEod(const StateSet &in, flat_set &rv) { reports_i(in, true, rv); } -public: - u32 numStates; + static bool canPrune(const flat_set &) { return false; } + + const NGHolder &graph; + const u32 numStates; + const flat_set &unused; + + array alpha; + array unalpha; + u16 alphasize; + + set done_a; + set done_b; + + u16 start_anchored; + u16 start_floating; + vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -235,101 +235,54 @@ class Automaton_Big : public Automaton_Base { StateSet dead; }; -class Automaton_Graph : public Automaton_Base { -public: - typedef bitfield StateSet; - typedef ue2::unordered_map StateMap; +struct Big_Traits { + using StateSet = dynamic_bitset<>; + using StateMap = map; - Automaton_Graph(const NGHolder &graph_in, - const flat_set &unused_in, - som_type som, const vector> &triggers, - bool unordered_som) - : Automaton_Base(graph_in, unused_in) { - populateInit(graph, unused, &init, &initDS, &v_by_index); - populateAccepts(graph, &accept, &acceptEod); + static StateSet init_states(u32 num) { + return StateSet(num); + } - start_anchored = DEAD_STATE + 1; - if (initDS == init) { - start_floating = start_anchored; - } else if (initDS.any()) { - start_floating = start_anchored + 1; - } else { - start_floating = DEAD_STATE; - } + static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { + assert(in.size() == num); + return in; + } +}; - if (!unordered_som) { - for (const auto &sq : findSquashers(graph, som)) { - NFAVertex v = sq.first; - u32 vert_id = graph[v].index; - squash.set(vert_id); - squash_mask[vert_id] = shrinkStateSet(sq.second); - } - } +class Automaton_Big : public Automaton_Base { +public: + Automaton_Big(const NGHolder &graph_in, + const flat_set &unused_in, som_type som, + const vector> &triggers, bool unordered_som) + : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} +}; - cr_by_index = populateCR(graph, v_by_index, alpha); - if (is_triggered(graph)) { - dynamic_bitset<> temp(NFA_STATE_LIMIT); - markToppableStarts(graph, unused, false, triggers, &temp); - toppable = bitfield(temp); - } +struct Graph_Traits { + using StateSet = bitfield; + using StateMap = ue2::unordered_map; + + static StateSet init_states(UNUSED u32 num) { + assert(num <= NFA_STATE_LIMIT); + return StateSet(); } -private: - // Convert an NFAStateSet (as used by the squash code) into a StateSet. - StateSet shrinkStateSet(const NFAStateSet &in) const { - StateSet out; + static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { + StateSet out = init_states(num); for (size_t i = in.find_first(); i != in.npos && i < out.size(); i = in.find_next(i)) { out.set(i); } return out; } +}; +class Automaton_Graph : public Automaton_Base { public: - void transition(const StateSet &in, StateSet *next) { - transition_graph(*this, v_by_index, in, next); - } - - const vector initial() { - vector rv(1, init); - if (start_floating != DEAD_STATE && start_floating != start_anchored) { - rv.push_back(initDS); - } - return rv; - } - -private: - void reports_i(const StateSet &in, bool eod, flat_set &rv) { - StateSet acc = in & (eod ? acceptEod : accept); - for (size_t i = acc.find_first(); i != StateSet::npos; - i = acc.find_next(i)) { - NFAVertex v = v_by_index[i]; - DEBUG_PRINTF("marking report\n"); - const auto &my_reports = graph[v].reports; - rv.insert(my_reports.begin(), my_reports.end()); - } - } - -public: - void reports(const StateSet &in, flat_set &rv) { - reports_i(in, false, rv); - } - void reportsEod(const StateSet &in, flat_set &rv) { - reports_i(in, true, rv); - } - -public: - vector v_by_index; - vector cr_by_index; /* pre alpha'ed */ - StateSet init; - StateSet initDS; - StateSet squash; /* states which allow us to mask out other states */ - StateSet accept; - StateSet acceptEod; - StateSet toppable; /* states which are allowed to be on when a top arrives, - * triggered dfas only */ - map squash_mask; - StateSet dead; + Automaton_Graph(const NGHolder &graph_in, + const flat_set &unused_in, som_type som, + const vector> &triggers, + bool unordered_som) + : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} }; class Automaton_Haig_Merge { From 75195f5f2e76245ed150340736ff68785a2fbf62 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 20 Apr 2016 17:11:25 +1000 Subject: [PATCH 169/218] ng_mcclellan: move logic into base class Move all of the Automaton logic into Automaton_Base, which is templated on its StateSet/StateMap types. --- src/nfagraph/ng_mcclellan.cpp | 166 +++++++++++----------------------- 1 file changed, 55 insertions(+), 111 deletions(-) diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 897162872..b1c6ff967 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -341,18 +341,24 @@ void markToppableStarts(const NGHolder &g, const flat_set &unused, namespace { -class Automaton_Big { +template +class Automaton_Base { public: - typedef dynamic_bitset<> StateSet; - typedef map StateMap; + using StateSet = typename Automaton_Traits::StateSet; + using StateMap = typename Automaton_Traits::StateMap; - Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, - const vector> &triggers, bool prunable_in) + Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in, + const flat_set &unused_in, bool single_trigger, + const vector> &triggers, bool prunable_in) : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), - unused(unused_in), init(numStates), initDS(numStates), - squash(numStates), accept(numStates), acceptEod(numStates), - toppable(numStates), prunable(prunable_in), dead(numStates) { + unused(unused_in), init(Automaton_Traits::init_states(numStates)), + initDS(Automaton_Traits::init_states(numStates)), + squash(Automaton_Traits::init_states(numStates)), + accept(Automaton_Traits::init_states(numStates)), + acceptEod(Automaton_Traits::init_states(numStates)), + toppable(Automaton_Traits::init_states(numStates)), + dead(Automaton_Traits::init_states(numStates)), + prunable(prunable_in) { populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, unused, &accept, &acceptEod); @@ -376,15 +382,17 @@ class Automaton_Big { cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { + dynamic_bitset<> temp(numStates); markToppableStarts(graph, unused, single_trigger, triggers, - &toppable); + &temp); + toppable = Automaton_Traits::copy_states(temp, numStates); } } private: // Convert an NFAStateSet (as used by the squash code) into a StateSet StateSet shrinkStateSet(const NFAStateSet &in) const { - StateSet out(dead.size()); + StateSet out = Automaton_Traits::init_states(numStates); for (size_t i = in.find_first(); i != in.npos && i < out.size(); i = in.find_next(i)) { out.set(i); @@ -398,7 +406,7 @@ class Automaton_Big { } const vector initial() { - vector rv(1, init); + vector rv = {init}; if (start_floating != DEAD_STATE && start_floating != start_anchored) { rv.push_back(initDS); } @@ -446,9 +454,9 @@ class Automaton_Big { StateSet acceptEod; StateSet toppable; /* states which are allowed to be on when a top arrives, * triggered dfas only */ + StateSet dead; map squash_mask; bool prunable; - StateSet dead; array alpha; array unalpha; u16 alphasize; @@ -457,119 +465,55 @@ class Automaton_Big { u16 start_floating; }; -class Automaton_Graph { -public: - typedef bitfield StateSet; - typedef ue2::unordered_map StateMap; +struct Big_Traits { + using StateSet = dynamic_bitset<>; + using StateMap = map; - Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, - const vector> &triggers, bool prunable_in) - : rm(rm_in), graph(graph_in), unused(unused_in), prunable(prunable_in) { - populateInit(graph, unused, &init, &initDS, &v_by_index); - populateAccepts(graph, unused, &accept, &acceptEod); + static StateSet init_states(u32 num) { + return StateSet(num); + } - start_anchored = DEAD_STATE + 1; - if (initDS == init) { - start_floating = start_anchored; - } else if (initDS.any()) { - start_floating = start_anchored + 1; - } else { - start_floating = DEAD_STATE; - } + static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { + assert(in.size() == num); + return in; + } +}; - calculateAlphabet(graph, alpha, unalpha, &alphasize); - assert(alphasize <= ALPHABET_SIZE); +class Automaton_Big : public Automaton_Base { +public: + Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, + const flat_set &unused_in, bool single_trigger, + const vector> &triggers, bool prunable_in) + : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + prunable_in) {} +}; - for (const auto &sq : findSquashers(graph)) { - NFAVertex v = sq.first; - u32 vert_id = graph[v].index; - squash.set(vert_id); - squash_mask[vert_id] = shrinkStateSet(sq.second); - } +struct Graph_Traits { + using StateSet = bitfield; + using StateMap = ue2::unordered_map; - cr_by_index = populateCR(graph, v_by_index, alpha); - if (is_triggered(graph)) { - dynamic_bitset<> temp(NFA_STATE_LIMIT); - markToppableStarts(graph, unused, single_trigger, triggers, &temp); - toppable = bitfield(temp); - } + static StateSet init_states(UNUSED u32 num) { + assert(num <= NFA_STATE_LIMIT); + return StateSet(); } -private: - // Convert an NFAStateSet (as used by the squash code) into a StateSet - StateSet shrinkStateSet(const NFAStateSet &in) const { - StateSet out; + static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { + StateSet out = init_states(num); for (size_t i = in.find_first(); i != in.npos && i < out.size(); i = in.find_next(i)) { out.set(i); } return out; } +}; +class Automaton_Graph : public Automaton_Base { public: - void transition(const StateSet &in, StateSet *next) { - transition_graph(*this, v_by_index, in, next); - } - - const vector initial() { - vector rv(1, init); - if (start_floating != DEAD_STATE && start_floating != start_anchored) { - rv.push_back(initDS); - } - return rv; - } - -private: - void reports_i(const StateSet &in, bool eod, flat_set &rv) { - StateSet acc = in & (eod ? acceptEod : accept); - for (size_t i = acc.find_first(); i != StateSet::npos; - i = acc.find_next(i)) { - NFAVertex v = v_by_index[i]; - DEBUG_PRINTF("marking report\n"); - const auto &my_reports = graph[v].reports; - rv.insert(my_reports.begin(), my_reports.end()); - } - } - -public: - void reports(const StateSet &in, flat_set &rv) { - reports_i(in, false, rv); - } - void reportsEod(const StateSet &in, flat_set &rv) { - reports_i(in, true, rv); - } - - bool canPrune(const flat_set &test_reports) const { - if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) { - return false; - } - return allExternalReports(*rm, test_reports); - } - -private: - const ReportManager *rm; -public: - const NGHolder &graph; - const flat_set &unused; - vector v_by_index; - vector cr_by_index; /* pre alpha'ed */ - StateSet init; - StateSet initDS; - StateSet squash; /* states which allow us to mask out other states */ - StateSet accept; - StateSet acceptEod; - StateSet toppable; /* states which are allowed to be on when a top arrives, - * triggered dfas only */ - map squash_mask; - bool prunable; - StateSet dead; - array alpha; - array unalpha; - u16 alphasize; - - u16 start_anchored; - u16 start_floating; + Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, + const flat_set &unused_in, bool single_trigger, + const vector> &triggers, bool prunable_in) + : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + prunable_in) {} }; } // namespace From fe8ffc55445fd570565f50d2f59f42cca455e7e1 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 21 Apr 2016 15:39:47 +1000 Subject: [PATCH 170/218] noodle: use SSE palignr --- src/hwlm/noodle_engine.c | 3 ++- src/hwlm/noodle_engine_sse.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 621f89b0c..e2f80a596 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include "util/compare.h" #include "util/masked_move.h" #include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" #include #include diff --git a/src/hwlm/noodle_engine_sse.c b/src/hwlm/noodle_engine_sse.c index 956fd82e7..b36732462 100644 --- a/src/hwlm/noodle_engine_sse.c +++ b/src/hwlm/noodle_engine_sse.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -190,8 +190,8 @@ hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key, m128 v = noCase ? and128(load128(d), caseMask) : load128(d); m128 z1 = eq128(mask1, v); m128 z2 = eq128(mask2, v); - u32 z = movemask128(and128(or128(lastz1, shiftLeft8Bits(z1)), z2)); - lastz1 = _mm_srli_si128(z1, 15); + u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2)); + lastz1 = z1; // On large packet buffers, this prefetch appears to get us about 2%. __builtin_prefetch(d + 128); From 54c0fb7e0e91ecd8b7db9ea5267b363874436e8a Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 21 Apr 2016 15:50:01 +1000 Subject: [PATCH 171/218] noodle: remove redundant bitmask --- src/hwlm/noodle_engine_avx2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hwlm/noodle_engine_avx2.c b/src/hwlm/noodle_engine_avx2.c index 093501cb9..14d0eab54 100644 --- a/src/hwlm/noodle_engine_avx2.c +++ b/src/hwlm/noodle_engine_avx2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -221,7 +221,7 @@ hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key, u32 z0 = movemask256(eq256(mask1, v)); u32 z1 = movemask256(eq256(mask2, v)); u32 z = (lastz0 | (z0 << 1)) & z1; - lastz0 = (z0 & 0x80000000) >> 31; + lastz0 = z0 >> 31; // On large packet buffers, this prefetch appears to get us about 2%. __builtin_prefetch(d + 128); From da89f5ef6bc33782b88a5f2114846dfd90903272 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 21 Apr 2016 13:57:57 +1000 Subject: [PATCH 172/218] ue2string: bring caseless cmp inline --- src/util/ue2string.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 4b1661967..6fdc57ba5 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -173,7 +173,16 @@ size_t maxStringSelfOverlap(const string &a, bool nocase) { } u32 cmp(const char *a, const char *b, size_t len, bool nocase) { - return cmpForward((const u8 *)a, (const u8 *)b, len, nocase); + if (!nocase) { + return memcmp(a, b, len); + } + + for (const auto *a_end = a + len; a < a_end; a++, b++) { + if (mytoupper(*a) != mytoupper(*b)) { + return 1; + } + } + return 0; } case_iter::case_iter(const ue2_literal &ss) : s(ss.get_string()), From 07bb14a84a3d282ac98ff33578895fd7ed147100 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 21 Apr 2016 14:17:45 +1000 Subject: [PATCH 173/218] hwlm_literal: coerce nocase lits to upper-case --- src/hwlm/hwlm_literal.cpp | 19 ++++++++++++------- src/hwlm/hwlm_literal.h | 11 +++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/hwlm/hwlm_literal.cpp b/src/hwlm/hwlm_literal.cpp index 4d8b43361..9e365a0cd 100644 --- a/src/hwlm/hwlm_literal.cpp +++ b/src/hwlm/hwlm_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,13 +34,11 @@ #include "util/compare.h" // for ourisalpha #include "util/ue2string.h" // for escapeString +#include #include #include -#include - using namespace std; -using namespace boost::algorithm; namespace ue2 { @@ -91,10 +89,17 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); - DEBUG_PRINTF("literal '%s', msk=%s, cmp=%s\n", - escapeString(s).c_str(), dumpMask(msk).c_str(), + // If we've been handled a nocase literal, all letter characters must be + // upper-case. + if (nocase) { + upperString(s); + } + + DEBUG_PRINTF("literal '%s'%s, msk=%s, cmp=%s\n", escapeString(s).c_str(), + nocase ? " (nocase)" : "", dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + // Mask and compare vectors MUST be the same size. assert(msk.size() == cmp.size()); @@ -102,7 +107,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, assert(maskIsConsistent(s, nocase, msk, cmp)); // In the name of good hygiene, zap msk/cmp if msk is all zeroes. - if (all_of_equal(msk.begin(), msk.end(), 0)) { + if (all_of(begin(msk), end(msk), [](u8 val) { return val == 0; })) { msk.clear(); cmp.clear(); } diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index ca9695bc5..7e63a6f32 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -95,11 +95,6 @@ struct hwlmLiteral { */ std::vector cmp; - /** \brief Simple constructor: no group information, no msk/cmp. */ - hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in) - : s(s_in), id(id_in), nocase(nocase_in), noruns(false), - groups(HWLM_ALL_GROUPS), msk(0), cmp(0) {} - /** \brief Complete constructor, takes group information and msk/cmp. * * This constructor takes a msk/cmp pair. Both must be vectors of length <= @@ -107,6 +102,10 @@ struct hwlmLiteral { hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, const std::vector &msk_in, const std::vector &cmp_in); + + /** \brief Simple constructor: no group information, no msk/cmp. */ + hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in) + : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} }; /** From 8eec61445f4a0bab48fa329908f9caf61dff4cef Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 21 Apr 2016 15:07:21 +1000 Subject: [PATCH 174/218] unit: fix up noodle unit tests --- unit/internal/noodle.cpp | 82 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/unit/internal/noodle.cpp b/unit/internal/noodle.cpp index 56fcc8f02..d1d9a1b47 100644 --- a/unit/internal/noodle.cpp +++ b/unit/internal/noodle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "hwlm/noodle_engine.h" #include "hwlm/hwlm.h" #include "util/alloc.h" +#include "util/ue2string.h" #include #include @@ -64,9 +65,15 @@ hwlmcb_rv_t hlmSimpleCallback(size_t from, size_t to, u32 id, void *context) { } static -void noodleMatch(const u8 *data, size_t data_len, const u8 *lit, size_t lit_len, - char nocase, HWLMCallback cb, void *ctxt) { - auto n = noodBuildTable(lit, lit_len, nocase, 0); +void noodleMatch(const u8 *data, size_t data_len, const char *lit, + size_t lit_len, char nocase, HWLMCallback cb, void *ctxt) { + // Coerce to upper-case if nocase. + std::string s(lit, lit_len); + if (nocase) { + upperString(s); + } + + auto n = noodBuildTable((const u8 *)s.c_str(), s.length(), nocase, 0); ASSERT_TRUE(n != nullptr); hwlm_error_t rv; @@ -82,8 +89,7 @@ TEST(Noodle, nood1) { memset(data, 'a', data_len); - noodleMatch(data, data_len, (const u8 *)"a", 1, 0, hlmSimpleCallback, - &ctxt); + noodleMatch(data, data_len, "a", 1, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(1024U, ctxt.size()); for (i = 0; i < 1024; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -91,13 +97,11 @@ TEST(Noodle, nood1) { } ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"A", 1, 0, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "A", 1, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"A", 1, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "A", 1, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1024U, ctxt.size()); for (i = 0; i < 1024; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -106,8 +110,8 @@ TEST(Noodle, nood1) { for (j = 0; j < 16; j++) { ctxt.clear(); - noodleMatch(data + j, data_len - j, (const u8 *)"A", 1, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data + j, data_len - j, "A", 1, 1, hlmSimpleCallback, + &ctxt); ASSERT_EQ(1024 - j, ctxt.size()); for (i = 0; i < 1024 - j; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -115,8 +119,7 @@ TEST(Noodle, nood1) { } ctxt.clear(); - noodleMatch(data, data_len - j, (const u8 *)"A", 1, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len - j, "A", 1, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1024 - j, ctxt.size()); for (i = 0; i < 1024 - j; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -133,8 +136,7 @@ TEST(Noodle, nood2) { memset(data, 'a', data_len); - noodleMatch(data, data_len, (const u8 *)"aa", 2, 0, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aa", 2, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -142,18 +144,15 @@ TEST(Noodle, nood2) { } ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"aA", 2, 0, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aA", 2, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"AA", 2, 0, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "AA", 2, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"aa", 2, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aa", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -161,8 +160,7 @@ TEST(Noodle, nood2) { } ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"Aa", 2, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "Aa", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -170,8 +168,7 @@ TEST(Noodle, nood2) { } ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"AA", 2, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "AA", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -180,8 +177,8 @@ TEST(Noodle, nood2) { for (j = 0; j < 16; j++) { ctxt.clear(); - noodleMatch(data + j, data_len - j, (const u8 *)"Aa", 2, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data + j, data_len - j, "Aa", 2, 1, hlmSimpleCallback, + &ctxt); ASSERT_EQ(1023 - j, ctxt.size()); for (i = 0; i < 1023 - j; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -189,8 +186,7 @@ TEST(Noodle, nood2) { } ctxt.clear(); - noodleMatch(data, data_len - j, (const u8 *)"aA", 2, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len - j, "aA", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023 - j, ctxt.size()); for (i = 0; i < 1023 - j; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -207,8 +203,7 @@ TEST(Noodle, noodLong) { memset(data, 'a', data_len); - noodleMatch(data, data_len, (const u8 *)"aaaa", 4, 0, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aaaa", 4, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(1021U, ctxt.size()); for (i = 0; i < 1021; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -216,13 +211,11 @@ TEST(Noodle, noodLong) { } ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"aaAA", 4, 0, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aaAA", 4, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, (const u8 *)"aaAA", 4, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aaAA", 4, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1021U, ctxt.size()); for (i = 0; i < 1021; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -231,8 +224,8 @@ TEST(Noodle, noodLong) { for (j = 0; j < 16; j++) { ctxt.clear(); - noodleMatch(data + j, data_len -j, (const u8 *)"AAaa", 4, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data + j, data_len - j, "AAaa", 4, 1, hlmSimpleCallback, + &ctxt); ASSERT_EQ(1021 - j, ctxt.size()); for (i = 0; i < 1021 - j; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -240,8 +233,8 @@ TEST(Noodle, noodLong) { } ctxt.clear(); - noodleMatch(data + j, data_len -j, (const u8 *)"aaaA", 4, 1, - hlmSimpleCallback, &ctxt); + noodleMatch(data + j, data_len - j, "aaaA", 4, 1, hlmSimpleCallback, + &ctxt); ASSERT_EQ(1021 - j, ctxt.size()); for (i = 0; i < 1021 - j; i++) { ASSERT_EQ(i, ctxt[i].from); @@ -260,8 +253,7 @@ TEST(Noodle, noodCutoverSingle) { for (u32 align = 0; align < 16; align++) { for (u32 len = 0; len < max_data_len; len++) { ctxt.clear(); - noodleMatch(data + align, len, (const u8 *)"a", 1, 0, - hlmSimpleCallback, &ctxt); + noodleMatch(data + align, len, "a", 1, 0, hlmSimpleCallback, &ctxt); EXPECT_EQ(len, ctxt.size()); for (u32 i = 0; i < ctxt.size(); i++) { ASSERT_EQ(i, ctxt[i].from); @@ -281,9 +273,9 @@ TEST(Noodle, noodCutoverDouble) { for (u32 align = 0; align < 16; align++) { for (u32 len = 0; len < max_data_len; len++) { ctxt.clear(); - noodleMatch(data + align, len, (const u8 *)"aa", 2, 0, - hlmSimpleCallback, &ctxt); - EXPECT_EQ(len ? len - 1 : 0U , ctxt.size()); + noodleMatch(data + align, len, "aa", 2, 0, hlmSimpleCallback, + &ctxt); + EXPECT_EQ(len ? len - 1 : 0U, ctxt.size()); for (u32 i = 0; i < ctxt.size(); i++) { ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); From 31b1114f76b7dd80d6d277e9d79b8f7f047dd500 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 21 Apr 2016 13:39:16 +1000 Subject: [PATCH 175/218] cmpForward: assume (and assert) p2 is upper-case --- src/util/compare.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/util/compare.h b/src/util/compare.h index 11c01f08e..eaa717a4c 100644 --- a/src/util/compare.h +++ b/src/util/compare.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -97,9 +97,10 @@ u64a theirtoupper64(const u64a x) { static really_inline int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) { - const u8 *pEnd = (const u8 *)p1 + len; + const u8 *pEnd = p1 + len; for (; p1 < pEnd; p1++, p2++) { - if (mytolower(*p1) != mytolower(*p2)) { + assert(!ourisalpha(*p2) || myisupper(*p2)); // Already upper-case. + if ((u8)mytoupper(*p1) != *p2) { return 1; } } @@ -108,7 +109,7 @@ int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) { static really_inline int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) { - const u8 *pEnd = (const u8 *)p1 + len; + const u8 *pEnd = p1 + len; for (; p1 < pEnd; p1++, p2++) { if (*p1 != *p2) { return 1; @@ -129,6 +130,11 @@ int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) { #define CMP_SIZE sizeof(CMP_T) +/** + * \brief Compare two strings, optionally caselessly. + * + * Note: If nocase is true, p2 is assumed to be already upper-case. + */ #if defined(ARCH_IA32) static UNUSED never_inline #else @@ -145,11 +151,13 @@ int cmpForward(const u8 *p1, const u8 *p2, size_t len, char nocase) { if (nocase) { // Case-insensitive version. for (; p1 < p1_end; p1 += CMP_SIZE, p2 += CMP_SIZE) { - if (TOUPPER(ULOAD(p1)) != TOUPPER(ULOAD(p2))) { + assert(ULOAD(p2) == TOUPPER(ULOAD(p2))); // Already upper-case. + if (TOUPPER(ULOAD(p1)) != ULOAD(p2)) { return 1; } } - if (TOUPPER(ULOAD(p1_end)) != TOUPPER(ULOAD(p2_end))) { + assert(ULOAD(p2_end) == TOUPPER(ULOAD(p2_end))); // Already upper-case. + if (TOUPPER(ULOAD(p1_end)) != ULOAD(p2_end)) { return 1; } } else { // Case-sensitive version. From d7774f3d69022803005c884307b23fdafab3b75b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 22 Apr 2016 16:09:39 +1000 Subject: [PATCH 176/218] noodle_build: update interface to use hwlmLiteral --- src/hwlm/hwlm_build.cpp | 5 ++--- src/hwlm/noodle_build.cpp | 46 ++++++++++++++++++++++++--------------- src/hwlm/noodle_build.h | 5 +++-- unit/internal/noodle.cpp | 13 +++++------ 4 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 0de120bad..f86a70d26 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -526,8 +526,7 @@ aligned_unique_ptr hwlmBuild(const vector &lits, DEBUG_PRINTF("build noodle table\n"); engType = HWLM_ENGINE_NOOD; const hwlmLiteral &lit = lits.front(); - auto noodle = noodBuildTable((const u8 *)lit.s.c_str(), lit.s.length(), - lit.nocase, lit.id); + auto noodle = noodBuildTable(lit); if (noodle) { engSize = noodSize(noodle.get()); } diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index 9e087211a..d2b4e3f20 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -26,28 +26,35 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Noodle literal matcher: build code. */ -#include // for memcpy #include "noodle_build.h" + +#include "hwlm_literal.h" #include "noodle_internal.h" -#include "ue2common.h" #include "util/alloc.h" #include "util/compare.h" #include "util/verify_types.h" +#include "ue2common.h" + +#include // for memcpy namespace ue2 { static -size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) { +size_t findNoodFragOffset(const hwlmLiteral &lit) { + const auto &s = lit.s; + const size_t len = lit.s.length(); + size_t offset = 0; for (size_t i = 0; i + 1 < len; i++) { int diff = 0; - const char c = lit[i]; - const char d = lit[i + 1]; - if (nocase && ourisalpha(c)) { + const char c = s[i]; + const char d = s[i + 1]; + if (lit.nocase && ourisalpha(c)) { diff = (mytoupper(c) != mytoupper(d)); } else { diff = (c != d); @@ -60,21 +67,24 @@ size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) { return offset; } -/** \brief Construct a Noodle matcher for the given literal. */ -aligned_unique_ptr noodBuildTable(const u8 *lit, size_t len, - bool nocase, u32 id) { - size_t noodle_len = sizeof(noodTable) + len; - aligned_unique_ptr n = - aligned_zmalloc_unique(noodle_len); +aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit) { + if (!lit.msk.empty()) { + DEBUG_PRINTF("noodle can't handle supplementary masks\n"); + return nullptr; + } + + const auto &s = lit.s; + size_t noodle_len = sizeof(noodTable) + s.length(); + auto n = aligned_zmalloc_unique(noodle_len); assert(n); - size_t key_offset = findNoodFragOffset(lit, len, nocase); + size_t key_offset = findNoodFragOffset(lit); - n->id = id; - n->len = verify_u32(len); + n->id = lit.id; + n->len = verify_u32(s.length()); n->key_offset = verify_u32(key_offset); - n->nocase = nocase ? 1 : 0; - memcpy(n->str, lit, len); + n->nocase = lit.nocase ? 1 : 0; + memcpy(n->str, s.c_str(), s.length()); return n; } diff --git a/src/hwlm/noodle_build.h b/src/hwlm/noodle_build.h index 3e8f5cb52..1a41695f7 100644 --- a/src/hwlm/noodle_build.h +++ b/src/hwlm/noodle_build.h @@ -40,9 +40,10 @@ struct noodTable; namespace ue2 { +struct hwlmLiteral; + /** \brief Construct a Noodle matcher for the given literal. */ -ue2::aligned_unique_ptr noodBuildTable(const u8 *lit, size_t len, - bool nocase, u32 id); +ue2::aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit); size_t noodSize(const noodTable *n); diff --git a/unit/internal/noodle.cpp b/unit/internal/noodle.cpp index d1d9a1b47..5df662369 100644 --- a/unit/internal/noodle.cpp +++ b/unit/internal/noodle.cpp @@ -32,6 +32,7 @@ #include "hwlm/noodle_build.h" #include "hwlm/noodle_engine.h" #include "hwlm/hwlm.h" +#include "hwlm/hwlm_literal.h" #include "util/alloc.h" #include "util/ue2string.h" @@ -65,15 +66,11 @@ hwlmcb_rv_t hlmSimpleCallback(size_t from, size_t to, u32 id, void *context) { } static -void noodleMatch(const u8 *data, size_t data_len, const char *lit, +void noodleMatch(const u8 *data, size_t data_len, const char *lit_str, size_t lit_len, char nocase, HWLMCallback cb, void *ctxt) { - // Coerce to upper-case if nocase. - std::string s(lit, lit_len); - if (nocase) { - upperString(s); - } - - auto n = noodBuildTable((const u8 *)s.c_str(), s.length(), nocase, 0); + u32 id = 1000; + hwlmLiteral lit(std::string(lit_str, lit_len), nocase, id); + auto n = noodBuildTable(lit); ASSERT_TRUE(n != nullptr); hwlm_error_t rv; From 9fa11b48b0b1302a81995bec8489093d83ffd637 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 14 Apr 2016 12:30:46 +1000 Subject: [PATCH 177/218] mcclellan compile: more efficient discovery of accel --- src/nfa/mcclellancompile_accel.cpp | 69 +++++++++++++++++------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 432c035dd..c5325fcc6 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -272,8 +272,6 @@ map populateAccelerationInfo(const raw_dfa &rdfa, DEBUG_PRINTF("sds %hu\n", sds_proxy); for (size_t i = 0; i < rdfa.states.size(); i++) { - AccelScheme ei = strat.find_escape_strings(i); - if (i == DEAD_STATE) { continue; } @@ -281,8 +279,7 @@ map populateAccelerationInfo(const raw_dfa &rdfa, /* Note on report acceleration states: While we can't accelerate while we * are spamming out callbacks, the QR code paths don't raise reports * during scanning so they can accelerate report states. */ - if (generates_callbacks(rdfa.kind) - && !rdfa.states[i].reports.empty()) { + if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { continue; } @@ -290,6 +287,7 @@ map populateAccelerationInfo(const raw_dfa &rdfa, : ACCEL_DFA_MAX_STOP_CHAR; DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); + AccelScheme ei = strat.find_escape_strings(i); if (ei.cr.count() > single_limit) { DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, ei.cr.count()); @@ -335,6 +333,7 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx const dstate &raw = rdfa.states[this_idx]; const vector rev_map = reverse_alpha_remapping(rdfa); bool outs2_broken = false; + map succs; for (u32 i = 0; i < rev_map.size(); i++) { if (raw.next[i] == this_idx) { @@ -344,39 +343,49 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx const CharReach &cr_i = rev_map.at(i); rv.cr |= cr_i; + dstate_id_t next_id = raw.next[i]; - DEBUG_PRINTF("next is %hu\n", raw.next[i]); - const dstate &raw_next = rdfa.states[raw.next[i]]; + DEBUG_PRINTF("next is %hu\n", next_id); + const dstate &raw_next = rdfa.states[next_id]; + + if (outs2_broken) { + continue; + } if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { DEBUG_PRINTF("leads to report\n"); outs2_broken = true; /* cannot accelerate over reports */ - } - - if (outs2_broken) { continue; } + succs[next_id] |= cr_i; + } - CharReach cr_all_j; - for (u32 j = 0; j < rev_map.size(); j++) { - if (raw_next.next[j] == raw.next[j]) { - continue; - } + if (!outs2_broken) { + for (const auto &e : succs) { + const CharReach &cr_i = e.second; + const dstate &raw_next = rdfa.states[e.first]; - DEBUG_PRINTF("adding sym %u sym %u -> %hu to 2 \n", i, j, - raw_next.next[j]); - cr_all_j |= rev_map.at(j); - } + CharReach cr_all_j; + for (u32 j = 0; j < rev_map.size(); j++) { + if (raw_next.next[j] == raw.next[j]) { + continue; + } - if (cr_i.count() * cr_all_j.count() > 8) { - DEBUG_PRINTF("adding sym %u to double_cr\n", i); - rv.double_cr |= cr_i; - } else { - for (auto ii = cr_i.find_first(); ii != CharReach::npos; - ii = cr_i.find_next(ii)) { - for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; - jj = cr_all_j.find_next(jj)) { - rv.double_byte.emplace((u8)ii, (u8)jj); + DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n", e.first, + j, raw_next.next[j]); + cr_all_j |= rev_map.at(j); + } + + if (cr_i.count() * cr_all_j.count() > 8) { + DEBUG_PRINTF("adding %zu to double_cr\n", cr_i.count()); + rv.double_cr |= cr_i; + } else { + for (auto ii = cr_i.find_first(); ii != CharReach::npos; + ii = cr_i.find_next(ii)) { + for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; + jj = cr_all_j.find_next(jj)) { + rv.double_byte.emplace((u8)ii, (u8)jj); + } } } } @@ -385,10 +394,10 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx DEBUG_PRINTF("outs2 too big\n"); outs2_broken = true; } - } - if (outs2_broken) { - rv.double_byte.clear(); + if (outs2_broken) { + rv.double_byte.clear(); + } } DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); From 353f0b4da22bf7cb7ab8f44730fc16c55843cd33 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 27 Apr 2016 15:50:16 +1000 Subject: [PATCH 178/218] fdr: move fdrSize into namespace ue2 (build only) --- src/fdr/fdr.h | 5 +---- src/fdr/fdr_compile.cpp | 7 +++---- src/fdr/fdr_compile.h | 5 ++++- src/fdr/fdr_dump.cpp | 6 +++--- src/hwlm/hwlm_build.cpp | 1 - 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/fdr/fdr.h b/src/fdr/fdr.h index c70f94880..e0aa594fe 100644 --- a/src/fdr/fdr.h +++ b/src/fdr/fdr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,9 +43,6 @@ extern "C" { struct FDR; -/** \brief Returns size in bytes of the given FDR engine. */ -size_t fdrSize(const struct FDR *fdr); - /** \brief Returns non-zero if the contents of the stream state indicate that * there is active FDR history beyond the regularly used history. */ u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state); diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index ccf176267..2ffeb9847 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -571,10 +571,9 @@ fdrBuildTableHinted(const vector &lits, bool make_small, u32 hint, #endif -} // namespace ue2 - -// FIXME: should be compile-time only size_t fdrSize(const FDR *fdr) { assert(fdr); return fdr->size; } + +} // namespace ue2 diff --git a/src/fdr/fdr_compile.h b/src/fdr/fdr_compile.h index 734185686..c12e00714 100644 --- a/src/fdr/fdr_compile.h +++ b/src/fdr/fdr_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,6 +61,9 @@ fdrBuildTableHinted(const std::vector &lits, bool make_small, #endif +/** \brief Returns size in bytes of the given FDR engine. */ +size_t fdrSize(const struct FDR *fdr); + } // namespace ue2 #endif diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index 158170c26..a141f3882 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,11 +28,11 @@ #include "config.h" -#include "fdr.h" -#include "fdr_internal.h" +#include "fdr_compile.h" #include "fdr_compile_internal.h" #include "fdr_dump.h" #include "fdr_engine_description.h" +#include "fdr_internal.h" #include "teddy_engine_description.h" #include "ue2common.h" diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index f86a70d26..b39780171 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -37,7 +37,6 @@ #include "noodle_build.h" #include "ue2common.h" #include "fdr/fdr_compile.h" -#include "fdr/fdr.h" #include "nfa/shufticompile.h" #include "util/alloc.h" #include "util/bitutils.h" From f7cdfc5e54773add46e0d93681c022af98b84407 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 27 Apr 2016 16:37:25 +1000 Subject: [PATCH 179/218] fdr: don't include fdr.h if not necessary --- src/fdr/fdr_compile.cpp | 2 +- src/fdr/fdr_confirm_compile.cpp | 3 +-- src/fdr/fdr_streaming_compile.cpp | 3 +-- src/fdr/flood_compile.cpp | 3 +-- src/fdr/teddy_compile.cpp | 2 +- src/fdr/teddy_engine_description.cpp | 3 +-- 6 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 2ffeb9847..ccf177f0d 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -29,7 +29,7 @@ /** \file * \brief FDR literal matcher: build API. */ -#include "fdr.h" + #include "fdr_internal.h" #include "fdr_compile.h" #include "fdr_confirm.h" diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index db236f316..08946a5fe 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "fdr.h" #include "fdr_internal.h" #include "fdr_compile_internal.h" #include "fdr_confirm.h" diff --git a/src/fdr/fdr_streaming_compile.cpp b/src/fdr/fdr_streaming_compile.cpp index 8f9d47280..34536eecb 100644 --- a/src/fdr/fdr_streaming_compile.cpp +++ b/src/fdr/fdr_streaming_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "fdr.h" #include "fdr_internal.h" #include "fdr_streaming_internal.h" #include "fdr_compile_internal.h" diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index 321a32125..2c1317884 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "fdr.h" #include "fdr_internal.h" #include "fdr_confirm.h" #include "fdr_compile_internal.h" diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index cc8c33749..c1e46d85d 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index 87edcbe51..ead448a8f 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "fdr.h" #include "fdr_internal.h" #include "fdr_compile_internal.h" #include "fdr_confirm.h" From e86688e313244c755232ccc1ea93a7155eb2c92e Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 18 Apr 2016 11:23:56 +1000 Subject: [PATCH 180/218] add m128 byte shift functions variable_byte_shift_m128 taken from pug-interpreter branch --- CMakeLists.txt | 1 + src/util/simd_utils.h | 3 +- src/util/simd_utils_ssse3.c | 40 +++++++++++++++++++++++++ src/util/simd_utils_ssse3.h | 11 ++++++- unit/internal/simd_utils.cpp | 58 ++++++++++++++++++++++++++++++++++-- 5 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 src/util/simd_utils_ssse3.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a4973c026..0e118dd07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -521,6 +521,7 @@ set (hs_exec_SRCS src/util/shuffle_ssse3.h src/util/simd_utils.h src/util/simd_utils_ssse3.h + src/util/simd_utils_ssse3.c src/util/state_compress.h src/util/state_compress.c src/util/unaligned.h diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index e115aa7ae..99ad7ce59 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -199,6 +199,7 @@ static really_inline m128 shiftLeft8Bits(m128 a) { } #define byteShiftRight128(a, count_immed) _mm_srli_si128(a, count_immed) +#define byteShiftLeft128(a, count_immed) _mm_slli_si128(a, count_immed) #if !defined(__AVX2__) // TODO: this entire file needs restructuring - this carveout is awful diff --git a/src/util/simd_utils_ssse3.c b/src/util/simd_utils_ssse3.c new file mode 100644 index 000000000..50cbe007a --- /dev/null +++ b/src/util/simd_utils_ssse3.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "simd_utils_ssse3.h" + +const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +}; diff --git a/src/util/simd_utils_ssse3.h b/src/util/simd_utils_ssse3.h index 23c77d225..268bf4229 100644 --- a/src/util/simd_utils_ssse3.h +++ b/src/util/simd_utils_ssse3.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,6 +77,15 @@ m128 pshufb(m128 a, m128 b) { return result; } +extern const char vbs_mask_data[]; + +static really_inline +m128 variable_byte_shift_m128(m128 in, s32 amount) { + assert(amount >= -16 && amount <= 16); + m128 shift_mask = loadu128(vbs_mask_data + 16 - amount); + return pshufb(in, shift_mask); +} + #if defined(__AVX2__) static really_inline diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index c0b2b2a00..de0f1eea7 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "util/alloc.h" #include "util/make_unique.h" #include "util/simd_utils.h" - +#include "util/simd_utils_ssse3.h" using namespace std; using namespace ue2; @@ -640,4 +640,58 @@ TEST(SimdUtilsTest, set2x128) { } #endif +TEST(SimdUtilsTest, variableByteShift128) { + char base[] = "0123456789ABCDEF"; + m128 in = loadu128(base); + + EXPECT_TRUE(!diff128(byteShiftRight128(in, 0), + variable_byte_shift_m128(in, 0))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 1), + variable_byte_shift_m128(in, -1))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 2), + variable_byte_shift_m128(in, -2))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 3), + variable_byte_shift_m128(in, -3))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 4), + variable_byte_shift_m128(in, -4))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 5), + variable_byte_shift_m128(in, -5))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 6), + variable_byte_shift_m128(in, -6))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 7), + variable_byte_shift_m128(in, -7))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 8), + variable_byte_shift_m128(in, -8))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 9), + variable_byte_shift_m128(in, -9))); + EXPECT_TRUE(!diff128(byteShiftRight128(in, 10), + variable_byte_shift_m128(in, -10))); + + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 0), + variable_byte_shift_m128(in, 0))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 1), + variable_byte_shift_m128(in, 1))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 2), + variable_byte_shift_m128(in, 2))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 3), + variable_byte_shift_m128(in, 3))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 4), + variable_byte_shift_m128(in, 4))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 5), + variable_byte_shift_m128(in, 5))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 6), + variable_byte_shift_m128(in, 6))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 7), + variable_byte_shift_m128(in, 7))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 8), + variable_byte_shift_m128(in, 8))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 9), + variable_byte_shift_m128(in, 9))); + EXPECT_TRUE(!diff128(byteShiftLeft128(in, 10), + variable_byte_shift_m128(in, 10))); + + EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, 16))); + EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16))); +} + } // namespace From 598f0565cf6014b820c64f1cd83661a2d455fabe Mon Sep 17 00:00:00 2001 From: Mohammad Abdul Awal Date: Thu, 24 Mar 2016 13:59:56 +1100 Subject: [PATCH 181/218] fdr: Remove python codegen, add safezones --- CMakeLists.txt | 2 +- src/fdr/CMakeLists.txt | 6 - src/fdr/autogen.py | 48 +- src/fdr/autogen_utils.py | 171 +------ src/fdr/base_autogen.py | 167 ------- src/fdr/fdr.c | 758 ++++++++++++++++++++++++++++- src/fdr/fdr_autogen.py | 564 --------------------- src/fdr/fdr_compile.cpp | 5 +- src/fdr/fdr_confirm_runtime.h | 194 ++++---- src/fdr/fdr_dump.cpp | 15 +- src/fdr/fdr_engine_description.cpp | 42 +- src/fdr/fdr_engine_description.h | 4 +- src/fdr/fdr_internal.h | 18 +- src/fdr/fdr_loadval.h | 9 +- src/fdr/teddy_autogen.py | 306 ++++++++++-- src/runtime.c | 12 +- unit/internal/fdr_flood.cpp | 33 +- 17 files changed, 1182 insertions(+), 1172 deletions(-) delete mode 100644 src/fdr/base_autogen.py delete mode 100755 src/fdr/fdr_autogen.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e118dd07..ad7bb3f92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -964,7 +964,7 @@ endif() add_library(hs STATIC ${hs_SRCS} $) add_dependencies(hs ragel_Parser) -add_dependencies(hs autogen_compiler autogen_teddy_compiler) +add_dependencies(hs autogen_teddy_compiler) if (NOT BUILD_SHARED_LIBS) install(TARGETS hs DESTINATION lib) diff --git a/src/fdr/CMakeLists.txt b/src/fdr/CMakeLists.txt index 1436c3fc6..7bbf82ffc 100644 --- a/src/fdr/CMakeLists.txt +++ b/src/fdr/CMakeLists.txt @@ -5,8 +5,6 @@ set(AUTOGEN_PY_FILES arch.py autogen.py autogen_utils.py - base_autogen.py - fdr_autogen.py teddy_autogen.py ) @@ -22,18 +20,14 @@ endfunction(fdr_autogen) #now build the functions fdr_autogen(runtime fdr_autogen.c) -fdr_autogen(compiler fdr_autogen_compiler.cpp) fdr_autogen(teddy_runtime teddy_autogen.c) fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp) set(fdr_GENERATED_SRC ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen.c - ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen.c ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp PARENT_SCOPE) set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE) include_directories(${CMAKE_CURRENT_BINARY_DIR}) - - diff --git a/src/fdr/autogen.py b/src/fdr/autogen.py index e5b4f39e7..a85104870 100755 --- a/src/fdr/autogen.py +++ b/src/fdr/autogen.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright (c) 2015, Intel Corporation +# Copyright (c) 2015-2016, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -27,41 +27,9 @@ import sys from autogen_utils import * -from fdr_autogen import * from teddy_autogen import * from arch import * -# FDR setup - -# these are either produced - if the guard succeeds, or #defined to zeroes. -# either the function or the zero is fine in our array of function pointers - -def produce_fdr_runtimes(l): - for m in l: - m.produce_code() - -def produce_fdr_compiles(l): - print "void getFdrDescriptions(vector *out) {" - print " static const FDREngineDef defns[] = {" - for m in l: - m.produce_compile_call() - print " };" - print " out->clear();" - print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {" - print " out->push_back(FDREngineDescription(defns[i]));" - print " }" - print "}" - -def build_fdr_matchers(): - all_matchers = [ ] - strides = [ 1, 2, 4 ] - - common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 } - for s in strides: - all_matchers += [ M3(stride = s, **common) ] - - return all_matchers - # teddy setup def build_teddy_matchers(): @@ -124,7 +92,8 @@ def make_fdr_function_pointers(matcher_list): typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a); static FDRFUNCTYPE funcs[] = { """ - all_funcs = ",\n".join([ " %s" % m.get_name() for m in matcher_list ]) + all_funcs = " fdr_engine_exec,\n" + all_funcs += ",\n".join([ " %s" % m.get_name() for m in matcher_list ]) print all_funcs print """ }; @@ -138,16 +107,11 @@ def assign_ids(matcher_list, next_id): # Main entry point -m = build_fdr_matchers() -next_id = assign_ids(m, 0) tm = build_teddy_matchers() -next_id = assign_ids(tm, next_id) -if sys.argv[1] == "compiler": - produce_fdr_compiles(m) -elif sys.argv[1] == "runtime": - produce_fdr_runtimes(m) +next_id = assign_ids(tm, 1) +if sys.argv[1] == "runtime": produce_teddy_headers(tm) - make_fdr_function_pointers(m+tm) + make_fdr_function_pointers(tm) elif sys.argv[1] == "teddy_runtime": produce_teddy_runtimes(tm) elif sys.argv[1] == "teddy_compiler": diff --git a/src/fdr/autogen_utils.py b/src/fdr/autogen_utils.py index e3679ad9a..3544bc7b6 100755 --- a/src/fdr/autogen_utils.py +++ b/src/fdr/autogen_utils.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright (c) 2015, Intel Corporation +# Copyright (c) 2015-2016, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -41,9 +41,6 @@ def get_name(self): def size_in_bytes(self): return self.size / 8 - def isSIMDOnIntel(self): - return False - def zero_expression(self): return "0" @@ -63,15 +60,9 @@ def highbits(self, n): def lowbit_mask(self, n): return self.constant_to_string(self.lowbits(n)) - def highbit_mask(self, n): - return self.constant_to_string(self.highbits(n)) - def lowbit_extract_expr(self, expr_string, n): return "(%s & %s)" % ( expr_string, self.lowbit_mask(n)) - def highbit_extract_expr(self, expr_string, n): - return "(%s >> %d)" % (expr_string, self.size - n) - def flip_lowbits_expr(self, expr_string, n): return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n)) @@ -90,36 +81,10 @@ def shift_expr(self, expr_string, n): else: return "(%s)" % (expr_string) - # code is: - # "normal" (always between buf and len) - the default - # "aligned" (means normal + aligned to a natural boundary) - # "cautious_forward" (means may go off the end of buf+len) - # "cautious_backwards" (means may go off the start of buf) - # "cautious_everywhere" (means may go off both) - - def load_expr_data(self, offset = 0, code = "normal", - base_string = "ptr", bounds_lo = "buf", bounds_hi = "buf + len"): - if code is "normal": - return "lv_%s(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi) - elif code is "aligned": - if self.size is 8: - fail_out("no aligned byte loads") - return "lv_%s_a(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi) - elif code is "cautious_forward": - return "lv_%s_cf(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi) - elif code is "cautious_backward": - return "lv_%s_cb(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi) - elif code is "cautious_everywhere": - return "lv_%s_ce(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi) - - class SIMDIntegerType(IntegerType): def __init__(self, size): IntegerType.__init__(self, size) - def isSIMDOnIntel(self): - return True - def zero_expression(self): return "zeroes128()" @@ -132,9 +97,6 @@ def lowbit_extract_expr(self, expr_string, n): tmpExpr = "movq(%s)" % expr_string return tmpType.lowbit_extract_expr(tmpExpr, n) - def highbit_extract_expr(self, expr_string, n): - fail_out("Unimplemented high bit extract on m128") - def bit_extract_expr(self, expr_string, low, high, flip): fail_out("Unimplemented bit extract on m128") @@ -146,9 +108,9 @@ def shift_expr(self, expr_string, n): if n <= -self.size or n >= self.size: return self.zero_expression() elif (n > 0): - return "_mm_slli_si128(%s, %s)" % (expr_string, n / 8) + return "byteShiftLeft128(%s, %s)" % (expr_string, n / 8) elif (n < 0): - return "_mm_srli_si128(%s, %s)" % (expr_string, -n / 8) + return "byteShiftRight128(%s, %s)" % (expr_string, -n / 8) else: return "(%s)" % (expr_string) @@ -156,130 +118,3 @@ def lowbit_mask(self, n): if n % 8 != 0: fail_out("Trying to make a lowbit mask in a m128 by a bit granular value") return self.shift_expr("ones128()", -(128 - n)) - -def getRequiredType(bits): - if bits == 128: - return SIMDIntegerType(bits) - for b in [ 8, 16, 32, 64]: - if (bits <= b): - return IntegerType(b) - return None - -class IntegerVariable: - def __init__(self, name, type): - self.name = name - self.type = type - - def gen_initializer_stmt(self, initialization_string = None): - if initialization_string: - return "%s %s = %s;" % (self.type.get_name(), self.name, initialization_string) - else: - return "%s %s;" % (self.type.get_name(), self.name) - - -class Step: - def __init__(self, context, offset = 0): - self.context = context - self.matcher = context.matcher - self.offset = offset - self.latency = 1 - self.dependency_list = [] - self.latest = None - self.context.add_step(self) - - # return a string, complete with indentation - def emit(self): - indent = " " * (self.offset*2 + self.matcher.default_body_indent) - s = "\n".join( [ indent + line for line in self.val.split("\n")] ) - if self.latest: - s += " // " + str(self.debug_step) + " L" + str(self.latency) + " LTST:%d" % self.latest - if self.dependency_list: - s += " Derps: " - for (d,l) in self.dependency_list: - s += "%d/%d " % (d.debug_step,l) - return s - - def add_dependency(self, step, anti_dependency = False, output_dependency = False): - if anti_dependency or output_dependency: - self.dependency_list += [ (step, 1) ] - else: - self.dependency_list += [ (step, step.latency) ] - - def nv(self, type, var_name): - return self.context.new_var(self, type, var_name) - - def gv(self, var_name, reader = True, writer = False): - return self.context.get_var(self, var_name, reader = reader, writer = writer) - -# utility steps, generic - -class LabelStep(Step): - def __init__(self, context, offset = 0, label_prefix = "off"): - Step.__init__(self, context, offset) - self.val = "%s%d: UNUSED;" % (label_prefix, offset) - -class OpenScopeStep(Step): - def __init__(self, context, offset = 0): - Step.__init__(self, context, offset) - self.val = "{" - -class CloseScopeStep(Step): - def __init__(self, context, offset = 0): - Step.__init__(self, context, offset) - self.val = "}" - - -class CodeGenContext: - def __init__(self, matcher): - self.vars = {} - self.steps = [] - self.ctr = 0 - self.matcher = matcher - self.var_writer = {} # var to a single writer - self.var_readers = {} # var to a list of all the readers that read the last value - - def new_var(self, step, type, var_name): - var = IntegerVariable(var_name, type) - self.vars[var_name] = var - self.var_writer[var_name] = step - return var - - def get_var(self, step, var_name, reader = True, writer = False): - if reader: - writer_step = self.var_writer[var_name] - if writer_step: - step.add_dependency(writer_step) - self.var_readers.setdefault(var_name, []).append(step) - if writer and not reader: - if self.var_writer[var_name]: - step.add_dependency(self.var_writer[var_name], output_dependency = True) - if writer: - if self.var_readers.has_key(var_name): - for reader in [ r for r in self.var_readers[var_name] if r is not step ]: - step.add_dependency(reader, anti_dependency = True) - self.var_readers[var_name] = [] - self.var_writer[var_name] = step - return self.vars[var_name] - - def add_step(self, step): - self.steps += [ step ] - step.debug_step = self.ctr - self.ctr += 1 - - def dontschedule(self, finals): - return "\n".join( [ s.emit() for s in self.steps ] ) - - def schedule(self, finals): - for f in finals: - f.latest = f.latency - worklist = finals - while worklist: - current = worklist[0] - worklist = worklist[1:] - for (dep, lat) in current.dependency_list: - if dep.latest is None or dep.latest < (current.latest + dep.latency): - dep.latest = current.latest + lat - if dep not in worklist: - worklist += [ dep ] - self.steps.sort(reverse = True, key = lambda s : s.latest) - return "\n".join( [ s.emit() for s in self.steps ] ) diff --git a/src/fdr/base_autogen.py b/src/fdr/base_autogen.py deleted file mode 100644 index c9cf1b375..000000000 --- a/src/fdr/base_autogen.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2015, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys -from autogen_utils import * -from base_autogen import * -from string import Template - -class MatcherBase: - - def __init__(self): - pass - - def get_name(self): - return "fdr_exec_%03d" % self.id - - def produce_header(self, visible, header_only = False): - s = "" - if not visible: - s += "static never_inline" - s += """ -hwlm_error_t %s(UNUSED const struct FDR *fdr, - UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name() - if header_only: - s += ";" - else: - s += "{" - s += "\n" - return s - - def produce_guard(self): - print self.arch.get_guard() - - def produce_zero_alternative(self): - print """ -#else -#define %s 0 -#endif -""" % self.get_name() - - # trivial function for documentation/modularity - def close_guard(self): - print "#endif" - - def produce_common_declarations(self): - return """ - const u8 * buf = a->buf; - const size_t len = a->len; - const u8 * ptr = buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t * control = &controlVal; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 * tryFloodDetect = a->firstFloodDetect; - UNUSED u32 bit, bitRem, confSplit, idx; - u32 byte, cf; - const struct FDRConfirm *fdrc; - u32 last_match = (u32)-1; -""" - - def produce_continue_check(self): - return """if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { - *a->groups = controlVal; - return HWLM_TERMINATED; -} -""" - def produce_flood_check(self): - return """ - if (P0(ptr > tryFloodDetect)) { - tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes); - if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { - *a->groups = controlVal; - return HWLM_TERMINATED; - } - } -""" - - def produce_footer(self): - return """ - *a->groups = controlVal; - return HWLM_SUCCESS; -} -""" - - def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False): - if cautious: - caution_string = "VECTORING" - else: - caution_string = "NOT_CAUTIOUS" - conf_split_mask = IntegerType(32).constant_to_string( - self.conf_top_level_split - 1) - if enable_confirmless: - quick_check_string = """ - if (!fdrc->mult) { - u32 id = fdrc->nBitsOrSoleID; - if ((last_match == id) && (fdrc->flags & NoRepeat)) - continue; - last_match = id; - controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt); - continue; - } """ - else: - quick_check_string = "" - if do_bailout: - bailout_string = """ - if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;""" - else: - bailout_string = "" - - return Template(""" -if (P0(!!$CONFVAR)) { - do { - bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR); - byte = bit / $NUM_BUCKETS + $OFFSET; - bitRem = bit % $NUM_BUCKETS; - $BAILOUT_STRING - confSplit = *(ptr+byte) & $SPLIT_MASK; - idx = confSplit * $NUM_BUCKETS + bitRem; - cf = confBase[idx]; - if (!cf) - continue; - fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf); - if (!(fdrc->groups & *control)) - continue; - $QUICK_CHECK_STRING - confWithBit(fdrc, a, ptr - buf + byte, $CAUTION_STRING, $CONF_PULL_BACK, control, &last_match); - } while(P0(!!$CONFVAR)); - if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { - *a->groups = controlVal; - return HWLM_TERMINATED; - } -}""").substitute(CONFVAR = conf_var_name, - CONFVAR_SIZE = conf_var_size, - NUM_BUCKETS = self.num_buckets, - OFFSET = offset, - SPLIT_MASK = conf_split_mask, - QUICK_CHECK_STRING = quick_check_string, - BAILOUT_STRING = bailout_string, - CAUTION_STRING = caution_string, - CONF_PULL_BACK = self.conf_pull_back) - - -def indent(block, depth): - return "\n".join([ (" " * (4*depth)) + line for line in block.splitlines() ] ) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index f83a42652..c955680bf 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,28 +26,752 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "util/simd_utils.h" - -#define P0(cnd) unlikely(cnd) - #include "fdr.h" +#include "fdr_confirm.h" +#include "fdr_confirm_runtime.h" #include "fdr_internal.h" +#include "fdr_loadval.h" +#include "fdr_streaming_runtime.h" +#include "flood_runtime.h" #include "teddy_internal.h" +#include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" -#include "flood_runtime.h" +/** \brief number of bytes processed in each iteration */ +#define ITER_BYTES 16 + +/** \brief total zone buffer size */ +#define ZONE_TOTAL_SIZE 64 + +/** \brief maximum number of allowed zones */ +#define ZONE_MAX 3 + +/** \brief zone information. + * + * Zone represents a region of data to scan in FDR. + * + * The incoming buffer is to split in multiple zones to ensure two properties: + * 1: that we can read 8? bytes behind to generate a hash safely + * 2: that we can read the byte after the current byte (domain > 8) + */ +struct zone { + /** \brief copied buffer, used only when it is a boundary zone. */ + u8 ALIGN_CL_DIRECTIVE buf[ZONE_TOTAL_SIZE]; + + /** \brief shift amount for fdr state to avoid unwanted match. */ + u8 shift; + + /** \brief if boundary zone, start points into the zone buffer after the + * pre-padding. Otherwise, points to the main buffer, appropriately. */ + const u8 *start; + + /** \brief if boundary zone, end points to the end of zone. Otherwise, + * pointer to the main buffer, appropriately. */ + const u8 *end; + + /** \brief the amount to adjust to go from a pointer in the zones region + * (between start and end) to a pointer in the original data buffer. */ + ptrdiff_t zone_pointer_adjust; + + /** \brief firstFloodDetect from FDR_Runtime_Args for non-boundary zones, + * otherwise end of the zone buf. floodPtr always points inside the same + * buffer as the start pointe. */ + const u8 *floodPtr; +}; + +static +const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00 }, + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } +}; + +/* generates an initial state mask based on the last byte-ish of history rather + * than being all accepting. If there is no history to consider, the state is + * generated based on the minimum length of each bucket in order to prevent + * confirms. + */ +static really_inline +m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft, + const struct zone *z) { + m128 s; + if (len_history) { + /* +1: the zones ensure that we can read the byte at z->end */ + u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1); + tmp &= fdr->domainMask; + s = *((const m128 *)ft + tmp); + s = shiftRight8Bits(s); + } else { + s = fdr->start; + } + return s; +} + +static really_inline +void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, + u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, + u64a *conf8, m128 *s) { + /* +1: the zones ensure that we can read the byte at z->end */ + + u64a current_data_0; + u64a current_data_8; + + current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); + u64a v7 = (lv_u16(itPtr + 7, start_ptr, end_ptr + 1) << 1) & + domain_mask_adjusted; + u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; + u64a v1 = (current_data_0 >> 7) & domain_mask_adjusted; + u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted; + u64a v3 = (current_data_0 >> 23) & domain_mask_adjusted; + u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; + u64a v5 = (current_data_0 >> 39) & domain_mask_adjusted; + u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted; + current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); + u64a v15 = (lv_u16(itPtr + 15, start_ptr, end_ptr + 1) << 1) & + domain_mask_adjusted; + u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; + u64a v9 = (current_data_8 >> 7) & domain_mask_adjusted; + u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted; + u64a v11 = (current_data_8 >> 23) & domain_mask_adjusted; + u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; + u64a v13 = (current_data_8 >> 39) & domain_mask_adjusted; + u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted; + + m128 st0 = *(const m128 *)(ft + v0*8); + m128 st1 = *(const m128 *)(ft + v1*8); + m128 st2 = *(const m128 *)(ft + v2*8); + m128 st3 = *(const m128 *)(ft + v3*8); + m128 st4 = *(const m128 *)(ft + v4*8); + m128 st5 = *(const m128 *)(ft + v5*8); + m128 st6 = *(const m128 *)(ft + v6*8); + m128 st7 = *(const m128 *)(ft + v7*8); + m128 st8 = *(const m128 *)(ft + v8*8); + m128 st9 = *(const m128 *)(ft + v9*8); + m128 st10 = *(const m128 *)(ft + v10*8); + m128 st11 = *(const m128 *)(ft + v11*8); + m128 st12 = *(const m128 *)(ft + v12*8); + m128 st13 = *(const m128 *)(ft + v13*8); + m128 st14 = *(const m128 *)(ft + v14*8); + m128 st15 = *(const m128 *)(ft + v15*8); + + st1 = byteShiftLeft128(st1, 1); + st2 = byteShiftLeft128(st2, 2); + st3 = byteShiftLeft128(st3, 3); + st4 = byteShiftLeft128(st4, 4); + st5 = byteShiftLeft128(st5, 5); + st6 = byteShiftLeft128(st6, 6); + st7 = byteShiftLeft128(st7, 7); + st9 = byteShiftLeft128(st9, 1); + st10 = byteShiftLeft128(st10, 2); + st11 = byteShiftLeft128(st11, 3); + st12 = byteShiftLeft128(st12, 4); + st13 = byteShiftLeft128(st13, 5); + st14 = byteShiftLeft128(st14, 6); + st15 = byteShiftLeft128(st15, 7); + + *s = or128(*s, st0); + *s = or128(*s, st1); + *s = or128(*s, st2); + *s = or128(*s, st3); + *s = or128(*s, st4); + *s = or128(*s, st5); + *s = or128(*s, st6); + *s = or128(*s, st7); + *conf0 = movq(*s); + *s = byteShiftRight128(*s, 8); + *conf0 ^= ~0ULL; + + *s = or128(*s, st8); + *s = or128(*s, st9); + *s = or128(*s, st10); + *s = or128(*s, st11); + *s = or128(*s, st12); + *s = or128(*s, st13); + *s = or128(*s, st14); + *s = or128(*s, st15); + *conf8 = movq(*s); + *s = byteShiftRight128(*s, 8); + *conf8 ^= ~0ULL; +} + +static really_inline +void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, + u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, + u64a *conf8, m128 *s) { + u64a current_data_0; + u64a current_data_8; + + current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); + u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; + u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted; + u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; + u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted; + current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); + u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; + u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted; + u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; + u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted; + + m128 st0 = *(const m128 *)(ft + v0*8); + m128 st2 = *(const m128 *)(ft + v2*8); + m128 st4 = *(const m128 *)(ft + v4*8); + m128 st6 = *(const m128 *)(ft + v6*8); + m128 st8 = *(const m128 *)(ft + v8*8); + m128 st10 = *(const m128 *)(ft + v10*8); + m128 st12 = *(const m128 *)(ft + v12*8); + m128 st14 = *(const m128 *)(ft + v14*8); + + st2 = byteShiftLeft128(st2, 2); + st4 = byteShiftLeft128(st4, 4); + st6 = byteShiftLeft128(st6, 6); + st10 = byteShiftLeft128(st10, 2); + st12 = byteShiftLeft128(st12, 4); + st14 = byteShiftLeft128(st14, 6); + + *s = or128(*s, st0); + *s = or128(*s, st2); + *s = or128(*s, st4); + *s = or128(*s, st6); + *conf0 = movq(*s); + *s = byteShiftRight128(*s, 8); + *conf0 ^= ~0ULL; + + *s = or128(*s, st8); + *s = or128(*s, st10); + *s = or128(*s, st12); + *s = or128(*s, st14); + *conf8 = movq(*s); + *s = byteShiftRight128(*s, 8); + *conf8 ^= ~0ULL; +} + +static really_inline +void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, + u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, + u64a *conf8, m128 *s) { + u64a current_data_0; + u64a current_data_8; + + current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); + u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; + u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; + current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); + u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; + u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; + + m128 st0 = *(const m128 *)(ft + v0*8); + m128 st4 = *(const m128 *)(ft + v4*8); + m128 st8 = *(const m128 *)(ft + v8*8); + m128 st12 = *(const m128 *)(ft + v12*8); + + st4 = byteShiftLeft128(st4, 4); + st12 = byteShiftLeft128(st12, 4); + + *s = or128(*s, st0); + *s = or128(*s, st4); + *conf0 = movq(*s); + *s = byteShiftRight128(*s, 8); + *conf0 ^= ~0ULL; + + *s = or128(*s, st8); + *s = or128(*s, st12); + *conf8 = movq(*s); + *s = byteShiftRight128(*s, 8); + *conf8 ^= ~0ULL; +} + +static really_inline +void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal, + const u32 *confBase, const struct FDR_Runtime_Args *a, + const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match_id, + struct zone *z) { + const u8 bucket = 8; + const u8 pullback = 1; + + if (likely(!*conf)) { + return; + } + + /* ptr is currently referring to a location in the zone's buffer, we also + * need a pointer in the original, main buffer for the final string compare. + */ + const u8 *ptr_main = (const u8 *)((uintptr_t)ptr + z->zone_pointer_adjust); + + const u8 *confLoc = ptr; + + do { + u32 bit = findAndClearLSB_64(conf); + u32 byte = bit / bucket + offset; + u32 bitRem = bit % bucket; + u32 confSplit = *(ptr + byte); + u32 idx = confSplit * bucket + bitRem; + u32 cf = confBase[idx]; + if (!cf) { + continue; + } + const struct FDRConfirm *fdrc = (const struct FDRConfirm *) + ((const u8 *)confBase + cf); + if (!(fdrc->groups & *control)) { + continue; + } + if (!fdrc->mult) { + u32 id = fdrc->nBitsOrSoleID; + if ((*last_match_id == id) && (fdrc->flags & NoRepeat)) { + continue; + } + *last_match_id = id; + *controlVal = a->cb(ptr_main + byte - a->buf, + ptr_main + byte - a->buf, id, a->ctxt); + continue; + } + u64a confVal = *(const u64a *)(confLoc + byte - sizeof(u64a)); + confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, + control, last_match_id, confVal); + } while (unlikely(!!*conf)); +} + +static really_inline +void dumpZoneInfo(UNUSED struct zone *z, UNUSED size_t zone_id) { +#ifdef DEBUG + DEBUG_PRINTF("zone: zone=%zu, bufPtr=%p\n", zone_id, z->buf); + DEBUG_PRINTF("zone: startPtr=%p, endPtr=%p, shift=%u\n", + z->start, z->end, z->shift); + DEBUG_PRINTF("zone: zone_pointer_adjust=%zd, floodPtr=%p\n", + z->zone_pointer_adjust, z->floodPtr); + DEBUG_PRINTF("zone buf:"); + for (size_t i = 0; i < ZONE_TOTAL_SIZE; i++) { + if (i % 8 == 0) { + printf("_"); + } + if (z->buf[i]) { + printf("%02x", z->buf[i]); + } else { + printf(".."); + } + } + printf("\n"); +#endif +}; + +/** + * \brief Updates attributes for non-boundary region zone. + */ +static really_inline +void createMainZone(const u8 *flood, const u8 *begin, const u8 *end, + struct zone *z) { + z->zone_pointer_adjust = 0; /* zone buffer is the main buffer */ + z->start = begin; + z->end = end; + z->floodPtr = flood; + z->shift = 0; +} + +/** + * \brief Create zone for short cases (<= ITER_BYTES). + * + * For this case we need to copy everything into the zone's internal buffer. + * + * We need to ensure that we run over real data if it exists (in history or + * before zone begin). We also need to ensure 8 bytes before any data being + * matched can be read (to perform a conf hash). + * + * We also need to ensure that the data at z->end can be read. + * + * Hence, the zone consists of: + * 16 bytes of history, + * 1 - 24 bytes of data form the buffer (ending at end), + * 1 byte of final padding + */ +static really_inline +void createShortZone(const u8 *buf, const u8 *hend, const u8 *begin, + const u8 *end, struct zone *z) { + /* the floodPtr for BOUNDARY zones are maximum of end of zone buf to avoid + * the checks in boundary zone. */ + z->floodPtr = z->buf + ZONE_TOTAL_SIZE; + + ptrdiff_t z_len = end - begin; + assert(z_len > 0); + assert(z_len <= ITER_BYTES); + + z->shift = ITER_BYTES - z_len; /* ignore bytes outside region specified */ + + static const size_t ZONE_SHORT_DATA_OFFSET = 16; /* after history */ + + /* we are guaranteed to always have 16 initialised bytes at the end of + * the history buffer (they may be garbage coming from the stream state + * preceding hbuf, but bytes that don't correspond to actual history + * shouldn't affect computations). */ + *(m128 *)z->buf = loadu128(hend - sizeof(m128)); + + /* The amount of data we have to copy from main buffer. */ + size_t copy_len = MIN((size_t)(end - buf), + ITER_BYTES + sizeof(CONF_TYPE)); + + u8 *zone_data = z->buf + ZONE_SHORT_DATA_OFFSET; + switch (copy_len) { + case 1: + *zone_data = *(end - 1); + break; + case 2: + *(u16 *)zone_data = unaligned_load_u16(end - 2); + break; + case 3: + *(u16 *)zone_data = unaligned_load_u16(end - 3); + *(zone_data + 2) = *(end - 1); + break; + case 4: + *(u32 *)zone_data = unaligned_load_u32(end - 4); + break; + case 5: + case 6: + case 7: + /* perform copy with 2 overlapping 4-byte chunks from buf. */ + *(u32 *)zone_data = unaligned_load_u32(end - copy_len); + unaligned_store_u32(zone_data + copy_len - sizeof(u32), + unaligned_load_u32(end - sizeof(u32))); + break; + case 8: + *(u64a *)zone_data = unaligned_load_u64a(end - 8); + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + /* perform copy with 2 overlapping 8-byte chunks from buf. */ + *(u64a *)zone_data = unaligned_load_u64a(end - copy_len); + unaligned_store_u64a(zone_data + copy_len - sizeof(u64a), + unaligned_load_u64a(end - sizeof(u64a))); + break; + case 16: + /* copy 16-bytes from buf. */ + *(m128 *)zone_data = loadu128(end - 16); + break; + default: + assert(copy_len <= sizeof(m128) + sizeof(u64a)); + + /* perform copy with (potentially overlapping) 8-byte and 16-byte chunks. + */ + *(u64a *)zone_data = unaligned_load_u64a(end - copy_len); + storeu128(zone_data + copy_len - sizeof(m128), + loadu128(end - sizeof(m128))); + break; + } + + /* set the start and end location of the zone buf + * to be scanned */ + u8 *z_end = z->buf + ZONE_SHORT_DATA_OFFSET + copy_len; + assert(ZONE_SHORT_DATA_OFFSET + copy_len >= ITER_BYTES); + + /* copy the post-padding byte; this is required for domain > 8 due to + * overhang */ + *z_end = 0; + + z->end = z_end; + z->start = z_end - ITER_BYTES; + z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end); + assert(z->start + z->shift == z_end - z_len); +} + +/** + * \brief Create a zone for the start region. + * + * This function requires that there is > ITER_BYTES of data in the buffer to + * scan. The start zone itself is always responsible for scanning exactly + * ITER_BYTES of data - there are no warmup/junk bytes scanned. + * + * This zone ensures that the byte at z->end can be read and corresponds to + * the next byte of data. + * + * 8 bytes of history data are provided before z->start to allow proper hash + * generation in streaming mode. If buf != begin, upto 8 bytes of data + * prior to begin is also provided. + * + * Although we are not interested in bare literals which start before begin + * if buf != begin, lookarounds associated with the literal may require + * the data prior to begin for hash purposes. + */ +static really_inline +void createStartZone(const u8 *buf, const u8 *hend, const u8 *begin, + struct zone *z) { + assert(ITER_BYTES == sizeof(m128)); + assert(sizeof(CONF_TYPE) == 8); + static const size_t ZONE_START_BEGIN = sizeof(CONF_TYPE); + + const u8 *end = begin + ITER_BYTES; + + /* set floodPtr to the end of zone buf to avoid checks in start zone */ + z->floodPtr = z->buf + ZONE_TOTAL_SIZE; + + z->shift = 0; /* we are processing ITER_BYTES of real data */ + + /* we are guaranteed to always have 16 initialised bytes at the end of the + * history buffer (they may be garbage coming from the stream state + * preceding hbuf, but bytes that don't correspond to actual history + * shouldn't affect computations). However, for start zones, history is only + * required for conf hash purposes so we only need 8 bytes */ + unaligned_store_u64a(z->buf, unaligned_load_u64a(hend - sizeof(u64a))); + + /* The amount of data we have to copy from main buffer. */ + size_t copy_len = MIN((size_t)(end - buf), + ITER_BYTES + sizeof(CONF_TYPE)); + assert(copy_len >= 16); + + /* copy the post-padding byte; this is required for domain > 8 due to + * overhang. The start requires that there is data after the zone so it + * it safe to dereference end */ + z->buf[ZONE_START_BEGIN + copy_len] = *end; + + /* set the start and end location of the zone buf to be scanned */ + u8 *z_end = z->buf + ZONE_START_BEGIN + copy_len; + z->end = z_end; + z->start = z_end - ITER_BYTES; + + /* copy the first 8 bytes of the valid region */ + unaligned_store_u64a(z->buf + ZONE_START_BEGIN, + unaligned_load_u64a(end - copy_len)); + + /* copy the last 16 bytes, may overlap with the previous 8 byte write */ + storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); + + z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end); +} + +/** + * \brief Create a zone for the end region. + * + * This function requires that there is > ITER_BYTES of data in the buffer to + * scan. The end zone, however, is only responsible for a scanning the <= + * ITER_BYTES rump of data. The end zone is required to handle a full ITER_BYTES + * iteration as the main loop cannot handle the last byte of the buffer. + * + * This zone ensures that the byte at z->end can be read by filling it with a + * padding character. + * + * Upto 8 bytes of data prior to begin is also provided for the purposes of + * generating hashes. History is not copied, as all locations which require + * history for generating a hash are the responsiblity of the start zone. + */ +static really_inline +void createEndZone(const u8 *buf, const u8 *begin, const u8 *end, + struct zone *z) { + /* the floodPtr for BOUNDARY zones are maximum of end of zone buf to avoid + * the checks in boundary zone. */ + z->floodPtr = z->buf + ZONE_TOTAL_SIZE; + + ptrdiff_t z_len = end - begin; + assert(z_len > 0); + assert(z_len <= ITER_BYTES); + + z->shift = ITER_BYTES - z_len; + + /* The amount of data we have to copy from main buffer. */ + size_t copy_len = MIN((size_t)(end - buf), + ITER_BYTES + sizeof(CONF_TYPE)); + assert(copy_len >= 16); + + /* copy the post-padding byte; this is required for domain > 8 due to + * overhang */ + z->buf[copy_len] = 0; + + /* set the start and end location of the zone buf + * to be scanned */ + u8 *z_end = z->buf + copy_len; + z->end = z_end; + z->start = z_end - ITER_BYTES; + assert(z->start + z->shift == z_end - z_len); + + /* copy the first 8 bytes of the valid region */ + unaligned_store_u64a(z->buf, unaligned_load_u64a(end - copy_len)); + + /* copy the last 16 bytes, may overlap with the previous 8 byte write */ + storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); + + z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end); +} + +/** + * \brief Prepare zones. + * + * This function prepares zones with actual buffer and some padded bytes. + * The actual ITER_BYTES bytes in zone is preceded by main buf and/or + * history buf and succeeded by padded bytes possibly from main buf, + * if available. + */ +static really_inline +size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, + size_t start, const u8 *flood, struct zone *zoneArr) { + const u8 *ptr = buf + start; + size_t remaining = len - start; + + if (remaining <= ITER_BYTES) { + /* enough bytes to make only one zone */ + createShortZone(buf, hend, ptr, buf + len, &zoneArr[0]); + return 1; + } + + /* enough bytes to make more than one zone */ + + size_t numZone = 0; + createStartZone(buf, hend, ptr, &zoneArr[numZone++]); + ptr += ITER_BYTES; + + assert(ptr < buf + len); + + /* find maximum buffer location that the main zone can scan + * - must be a multiple of ITER_BYTES, and + * - cannot contain the last byte (due to overhang) + */ + const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 1, ITER_BYTES); + assert(main_end >= ptr); + + /* create a zone if multiple of ITER_BYTES are found */ + if (main_end != ptr) { + createMainZone(flood, ptr, main_end, &zoneArr[numZone++]); + ptr = main_end; + } + /* create a zone with rest of the data from the main buffer */ + createEndZone(buf, ptr, buf + len, &zoneArr[numZone++]); + return numZone; +} + +#define INVALID_MATCH_ID (~0U) + +#define FDR_MAIN_LOOP(zz, s, get_conf_fn) \ + do { \ + const u8 *tryFloodDetect = zz->floodPtr; \ + const u8 *start_ptr = zz->start; \ + const u8 *end_ptr = zz->end; \ + \ + for (const u8 *itPtr = start_ptr; itPtr + ITER_BYTES <= end_ptr; \ + itPtr += ITER_BYTES) { \ + if (unlikely(itPtr > tryFloodDetect)) { \ + tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\ + &floodBackoff, &controlVal, \ + ITER_BYTES); \ + if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ + return HWLM_TERMINATED; \ + } \ + } \ + __builtin_prefetch(itPtr + (ITER_BYTES*4)); \ + u64a conf0; \ + u64a conf8; \ + get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \ + ft, &conf0, &conf8, &s); \ + do_confirm_fdr(&conf0, 0, &controlVal, confBase, a, itPtr, \ + control, &last_match_id, zz); \ + do_confirm_fdr(&conf8, 8, &controlVal, confBase, a, itPtr, \ + control, &last_match_id, zz); \ + if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ + return HWLM_TERMINATED; \ + } \ + } /* end for loop */ \ + } while (0) \ + +static never_inline +hwlm_error_t fdr_engine_exec(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + u32 last_match_id = INVALID_MATCH_ID; + u64a domain_mask_adjusted = fdr->domainMask << 1; + u8 stride = fdr->stride; + const u8 *ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)); + const u32 *confBase = (const u32 *)(ft + fdr->tabSize); + struct zone zones[ZONE_MAX]; + assert(fdr->domain > 8 && fdr->domain < 16); + + size_t numZone = prepareZones(a->buf, a->len, + a->buf_history + a->len_history, + a->start_offset, a->firstFloodDetect, zones); + assert(numZone <= ZONE_MAX); + m128 state = getInitState(fdr, a->len_history, ft, &zones[0]); + + for (size_t curZone = 0; curZone < numZone; curZone++) { + struct zone *z = &zones[curZone]; + dumpZoneInfo(z, curZone); + + /* When a zone contains less data than is processed in an iteration + * of FDR_MAIN_LOOP(), we need to scan over some extra data. + * + * We have chosen to scan this extra data at the start of the + * iteration. The extra data is either data we have already scanned or + * garbage (if it is earlier than offset 0), + * + * As a result we need to shift the incoming state back so that it will + * properly line up with the data being scanned. + * + * We also need to forbid reporting any matches in the data being + * rescanned as they have already been reported (or are over garbage but + * later stages should also provide that safety guarantee). + */ + + u8 shift = z->shift; + + state = variable_byte_shift_m128(state, shift); + + state = or128(state, load128(zone_or_mask[shift])); + + switch (stride) { + case 1: + FDR_MAIN_LOOP(z, state, get_conf_stride_1); + break; + case 2: + FDR_MAIN_LOOP(z, state, get_conf_stride_2); + break; + case 4: + FDR_MAIN_LOOP(z, state, get_conf_stride_4); + break; + default: + break; + } + } + + return HWLM_SUCCESS; +} -#include "fdr_confirm.h" -#include "fdr_confirm_runtime.h" -#include "fdr_streaming_runtime.h" -#include "fdr_loadval.h" #include "fdr_autogen.c" #define FAKE_HISTORY_SIZE 16 static const u8 fake_history[FAKE_HISTORY_SIZE]; -hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start, - HWLMCallback cb, void *ctxt, hwlm_group_t groups) { - +hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, + size_t start, HWLMCallback cb, void *ctxt, + hwlm_group_t groups) { const struct FDR_Runtime_Args a = { buf, len, @@ -73,7 +797,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t st hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups, u8 * stream_state) { + hwlm_group_t groups, u8 *stream_state) { struct FDR_Runtime_Args a = { buf, len, @@ -86,9 +810,9 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, ctxt, &groups, nextFloodDetect(buf, len, FLOOD_BACKOFF_START), - hbuf ? CONF_LOADVAL_CALL_CAUTIOUS(hbuf + hlen - 8, hbuf, hbuf + hlen) - : (u64a)0 - + /* we are guaranteed to always have 16 initialised bytes at the end of + * the history buffer (they may be garbage). */ + hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0 }; fdrUnpackState(fdr, &a, stream_state); diff --git a/src/fdr/fdr_autogen.py b/src/fdr/fdr_autogen.py deleted file mode 100755 index 748d811f2..000000000 --- a/src/fdr/fdr_autogen.py +++ /dev/null @@ -1,564 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2015, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys -from autogen_utils import * -from base_autogen import * -from string import Template - -class OrStep(Step): - def __init__(self, context, offset, width): - Step.__init__(self, context, offset) - s_var = self.gv("st%d" % offset) - if width < 128: - self.val = "s |= %s;" % s_var.name - else: - self.val = "s = or%d(s, %s);" % (width, s_var.name) - -class ShiftStateStep(Step): - def __init__(self, context, offset = 0, stride_used = 1): - Step.__init__(self, context, offset) - m = self.matcher - state = m.state_variable - shift_distance = -1 * stride_used * m.num_buckets - self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance)) - -class BulkLoadStep(Step): - def __init__(self, context, offset, size, define_var = True, aligned = True): - Step.__init__(self, context, offset) - m = self.matcher - self.latency = 4 - blt = m.bulk_load_type - if aligned: - init_string = blt.load_expr_data(self.offset, code = "aligned") - else: - init_string = blt.load_expr_data(self.offset) - - var_name = "current_data_%d" % offset - if define_var: - lb_var = self.nv(blt, var_name) - self.val = lb_var.gen_initializer_stmt(init_string) - else: - lb_var = self.gv(var_name, reader = False, writer = True) - self.val = "%s = %s;" % (var_name, init_string) - -class ValueExtractStep(Step): - def __init__(self, context, offset, sub_load_cautious = False): - Step.__init__(self, context, offset) - m = self.matcher - self.latency = 2 - dsb = m.datasize_bytes - modval = offset % dsb - - if modval == dsb - 1: - # Case 1: reading more than one byte over the end of the bulk load - - self.latency = 4 - if sub_load_cautious: - code_string = "cautious_forward" - else: - code_string = "normal" - load_string = m.single_load_type.load_expr_data(self.offset, code_string) - temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust) - else: - # Case 2: reading a value that can be found entirely in the current register - if m.fdr2_force_naive_load: - load_string = m.single_load_type.load_expr_data(self.offset, "normal") - temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust) - else: - lb_var = self.gv("current_data_%d" % (offset - modval)) - if modval == 0: - # Case 2a: value is at LSB end of the register and must be left- - # shifted into place if there is a "reach_shift_adjust" required - temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust) - else: - # Case 2b: value is in the middle of the register and will be - # right-shifted into place (adjusted by "reach_shift_adjust") - temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust) - - - init_string = "(%s) & (domain_mask << %d)" % (temp_string, m.reach_shift_adjust) - v_var = self.nv(m.value_extract_type, "v%d" % offset) - self.val = v_var.gen_initializer_stmt(init_string) - -class TableLookupStep(Step): - def __init__(self, context, reach_multiplier, offset = 0): - Step.__init__(self, context, offset) - m = self.matcher - self.latency = 4 - v_var = self.gv("v%d" % offset) - s_var = self.nv(m.state_type, "st%d" % offset) - init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(), - v_var.name, reach_multiplier) - self.val = s_var.gen_initializer_stmt(init_string) - -class ShiftReachMaskStep(Step): - def __init__(self, context, offset): - Step.__init__(self, context, offset) - m = self.matcher - extr = m.extract_frequency - modval = offset % extr - s_var = self.gv("st%d" % offset, writer = True) - self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets)) - -class ConfExtractStep(Step): - def __init__(self, context, offset): - Step.__init__(self, context, offset) - m = self.matcher - if m.state_type.isSIMDOnIntel(): - self.latency = 2 - init_string = m.state_type.lowbit_extract_expr("s", m.extract_size) - extr_var = self.nv(m.extr_type, "extr%d" % offset) - self.val = extr_var.gen_initializer_stmt(init_string) - -class ConfAccumulateStep(Step): - def __init__(self, context, extract_offset, conf_offset, define_var = True): - Step.__init__(self, context, extract_offset) - m = self.matcher - extr_var = self.gv("extr%d" % extract_offset) - extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name) - if extract_offset == conf_offset: - # create conf_var as a straight copy of extr - if define_var: - conf_var = self.nv(m.conf_type, "conf%d" % conf_offset) - self.val = conf_var.gen_initializer_stmt(extr_var_cast) - else: - conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True) - self.val = "%s = %s;" % (conf_var.name, extr_var_cast) - else: - # shift extr_var and insert/OR it in conf_var - conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True) - shift_dist = (extract_offset - conf_offset) * m.num_buckets - self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist)) - self.latency = 2 - -class ConfirmFlipStep(Step): - def __init__(self, context, offset): - Step.__init__(self, context, offset) - m = self.matcher - conf_var = self.gv("conf%d" % self.offset, writer = True) - self.val = "%s = %s;" % (conf_var.name, - conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets)) - -class ConfirmStep(Step): - def __init__(self, context, offset, cautious = False): - Step.__init__(self, context, offset) - m = self.matcher - conf_var = self.gv("conf%d" % offset, writer = True) - self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious, - enable_confirmless = m.stride == 1, do_bailout = False) - -class M3(MatcherBase): - def produce_compile_call(self): - print " { %d, %d, %d, %d, %s, %d, %d }," % ( - self.id, self.state_width, self.num_buckets, - self.stride, - self.arch.target, self.conf_pull_back, self.conf_top_level_split) - - def produce_main_loop(self, switch_variant = False): - stride_offsets = xrange(0, self.loop_bytes, self.stride) - stride_offsetSet = set(stride_offsets) - so_steps_last_block = [] - sh = None - last_confirm = None - ctxt = CodeGenContext(self) - - if switch_variant: - print " ptr -= (iterBytes - dist);" - print " { " # need an extra scope around switch variant to stop its globals escaping - else: - print " if (doMainLoop) {" - print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {" - print self.produce_flood_check() - print " __builtin_prefetch(ptr + (iterBytes*4));" - print " assert(((size_t)ptr % START_MOD) == 0);" - - - # just do globally for now - if switch_variant: - subsidiary_load_cautious = True - confirm_cautious = True - else: - subsidiary_load_cautious = False - confirm_cautious = False - - if not self.fdr2_force_naive_load: - bulk_load_steps = [ off for off in range(self.loop_bytes) - if off % self.datasize_bytes == 0 and - (set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)] - else: - bulk_load_steps = [] - - confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ] - - for off in bulk_load_steps: - lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off) - print " " + lb_var.gen_initializer_stmt() - - - for off in confirm_steps: - var_name = "conf%d" % off - conf_def_var = ctxt.new_var(None, self.conf_type, var_name) - if switch_variant: - init_string = "(%s)-1" % self.conf_type.get_name() - else: - init_string = "" - print " " + conf_def_var.gen_initializer_stmt(init_string) - - if switch_variant: - print " switch(iterBytes - dist) {" - for i in range(0, self.loop_bytes): - print " case %d:" % i - - # init and poison conf; over-precise but harmless - conf_id = (i / self.confirm_frequency) * self.confirm_frequency - if i % self.confirm_frequency: - conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency)) - print " conf%d >>= %d;" % (conf_id, conf_fixup_bits) - else: - print " conf%d = 0;" % conf_id - - # init state - state_fixup = i % self.extract_frequency - state = self.state_variable - shift_distance = self.num_buckets * state_fixup - if state_fixup: - print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance)) - if self.state_width < 128: - print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance)) - else: - print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance)) - - if not self.fdr2_force_naive_load: - # init current_data (could poison it in some cases) - load_mod = i % self.datasize_bytes - load_offset = i - load_mod - if load_mod: - # not coming in on an even boundary means having to do a load var - # actually, there are a bunch of things we can do on this bulk load - # to avoid having to be 'cautious_backwards' but I'm not completely - # sure they are good ideas - init_string = self.bulk_load_type.load_expr_data(load_offset, - code = "cautious_backward") - var_name = "current_data_%d" % load_offset - lb_var = ctxt.get_var(None, var_name, reader = False, writer = True) - print " %s = %s;" % (lb_var.name, init_string) - - print " goto off%d;" % i - print " case %d: goto skipSwitch;" % self.loop_bytes - print " }" - print " {" - - - for off in range(self.loop_bytes): - # X_mod is the offset we're up to relative to the last X operation - # X_offset is which of the last X operations matches this iteration - - if (switch_variant): - LabelStep(ctxt, off) - - if off in bulk_load_steps: - if not self.fdr2_force_naive_load: - BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant) - - if off in stride_offsets: - if switch_variant: - OpenScopeStep(ctxt, off) - ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious) - TableLookupStep(ctxt, self.reach_mult, off) - if off % self.extract_frequency: - ShiftReachMaskStep(ctxt, off) - so = OrStep(ctxt, off, self.state_width) - if switch_variant: - CloseScopeStep(ctxt, off) - if sh != None: - so.add_dependency(sh) - so_steps_last_block += [ so ] - - extract_mod = off % self.extract_frequency - extract_offset = off - extract_mod - extract_ready = extract_mod == self.extract_frequency - 1 - if extract_ready: - if switch_variant: - OpenScopeStep(ctxt, off) - ex = ConfExtractStep(ctxt, extract_offset) - ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False) - for so_step in so_steps_last_block: - ex.add_dependency(so_step) - if switch_variant: - CloseScopeStep(ctxt, off) - so_steps_last_block = [] - sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency) - sh.add_dependency(ex) - - confirm_mod = off % self.confirm_frequency - confirm_offset = off - confirm_mod - confirm_ready = confirm_mod == self.confirm_frequency - 1 - if confirm_ready: - cflip = ConfirmFlipStep(ctxt, confirm_offset) - cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious ) - if last_confirm: - cf.add_dependency(last_confirm) - last_confirm = cf - - - if not switch_variant: - print ctxt.schedule([ last_confirm, sh ]) - else: - print ctxt.dontschedule([ last_confirm, sh ]) - - if switch_variant: - print "skipSwitch:;" - print " ptr += iterBytes;" - print " }" # close extra scope around switch variant - print " }" - - - def produce_init_state(self): - state = self.state_variable - s_type = self.state_type - shift_distance = -1 * self.num_buckets - shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance)) - - s = Template(""" - $TYPENAME s; - if (a->len_history) { - u32 tmp = 0; - if (a->start_offset == 0) { - tmp = a->buf_history[a->len_history - 1]; - tmp |= (a->buf[0] << 8); - } else { - tmp = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len); - } - tmp &= fdr->domainMask; - s = *((const $TYPENAME *)ft + tmp); - $SHIFT_EXPR; - } else { - s = *(const $TYPENAME *)&fdr->start; - } -""").substitute(TYPENAME = s_type.get_name(), - ZERO_EXPR = s_type.zero_expression(), - SHIFT_EXPR = shift_expr) - return s - - def produce_code(self): - - loop_read_behind = 0 - loop_read_ahead = self.loop_bytes + 1 - - # we set up mask and shift stuff for extracting our masks from registers - # - # we have a choice as to whether to mask out the value early or - # extract the value (shift first) then mask it - # - # Intel has a free scaling factor from 1/2/4/8 so we want to combine - # the extra needed shift for SSE registers with the mask operation - - ssb = self.state_type.size / 8 # state size in bytes - - # Intel path - if ssb == 16: - # obscure corner - we don't have the room in the register to - # do this for all values so we don't. domain==16 is pretty - # bad anyhow, of course - self.reach_mult = 8 - else: - self.reach_mult = ssb - - shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 } - self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ] - - print self.produce_header(visible = False) - - print "// ", - print " Arch: " + self.arch.name, - print " State type: " + self.state_type.get_name(), - print " Num buckets: %d" % self.num_buckets, - print " Stride: %d" % self.stride - - print self.produce_common_declarations() - - print " assert(fdr->domain > 8 && fdr->domain < 16);" - print - print " u64a domain_mask = fdr->domainMask;" - print " const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));" - print " const u32 * confBase = (const u32 *)(ft + fdr->tabSize);" - print self.produce_init_state() - print " const size_t iterBytes = %d;" % self.loop_bytes - print " const size_t START_MOD = %d;" % self.datasize_bytes - print " const size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead - - print """ - while (ptr < buf + len) { - - u8 doMainLoop = 1; - size_t remaining = len - (ptr - buf); - size_t dist; - if (remaining <= iterBytes) { - dist = remaining; // once through the switch and we're done - } else if (remaining < 2 * iterBytes) { - // nibble some stuff off the front, skip the main loop, - // then come back here - dist = iterBytes; // maybe could be cleverer - } else { - // now, we need to see if we can make it to a main loop iteration - // if so, we need to ensure that the main loop iteration is aligned - // to a START_MOD boundary and i >= 8 so we can read ptr + i - 8 - - // see if we can do it - if not, just switch the main loop off, - // eat iterBytes in cautious mode, and come back to this loop - - const u8 * target = MAX(buf + 8, ptr); - target = ROUNDUP_PTR(target, START_MOD); - dist = target - ptr; - if (dist > iterBytes) { - doMainLoop = 0; - dist = iterBytes; - } - } -""" - self.produce_main_loop(switch_variant = True) - self.produce_main_loop(switch_variant = False) - print """ - } -""" - print self.produce_footer() - - def get_name(self): - return "fdr_exec_%s_s%d_w%d" % (self.arch.name, self.stride, self.state_width) - - def __init__(self, state_width, stride, - arch, - table_state_width = None, - num_buckets = 8, - extract_frequency = None, - confirm_frequency = None): - - # First - set up the values that are fundamental to how this matcher will operate - self.arch = arch - - # get the width of the state width on which we operate internally - if state_width not in [ 128 ]: - fail_out("Unknown state width: %d" % state_width) - self.state_width = state_width - self.state_type = getRequiredType(self.state_width) - self.state_variable = IntegerVariable("s", self.state_type) - - table_state_width = state_width - self.table_state_width = state_width - self.table_state_type = getRequiredType(self.table_state_width) - - # this is the load type required for domain [9:15] if we want to - # load it one at a time - self.single_load_type = IntegerType(16) - - # stride is the frequency with which we make data-driven - # accesses to our reach table - if stride not in [ 1, 2, 4, 8]: - fail_out("Unsupported stride: %d" % stride) - if stride * num_buckets > state_width: - fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width)) - self.stride = stride - - if num_buckets != 8: - fail_out("Unsupported number of buckets: %d" % num_buckets) - if state_width % num_buckets and state_width == 128: - fail_out("Bucket scheme requires bit-shifts on m128 (failing)") - self.num_buckets = num_buckets - - # Second - set up derived or optimization values - these can be - # overridden by arguments that are passed in - - self.datasize = 64 - self.bulk_load_type = IntegerType(self.datasize) - self.datasize_bytes = self.datasize/8 - - self.value_extract_type = IntegerType(self.datasize) - - self.fdr2_force_naive_load = False # disable everywhere for trunk - - # extract frequency is how frequently (in bytes) we destructively shift - # our state value after having pulled out that many bytes into a - # confirm register (of one sort or another). - # none means a default value - datasize, our biggest easily available GPR - if extract_frequency is None: - extract_frequency = self.datasize_bytes - self.extract_frequency = extract_frequency - self.extract_size = self.extract_frequency*self.num_buckets - if extract_frequency < stride: - fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride)) - if extract_frequency not in [ None, 1, 2, 4, 8, 16]: - fail_out("Weird extract frequency: %d" % extract_frequency) - - if self.extract_size <= 32: - self.extr_type = IntegerType(32) - elif self.extract_size <= 64: - self.extr_type = IntegerType(64) - else: - fail_out("Implausible size %d required for confirm extract step" % size) - - # extract_frequency is how often we pull out our state and place - # it somewhere in a lossless fashion - # confirm_frequency, on the other hand, is how frequently we - # take the state extracted by extract_frequency and cobble it - # together into a matching loop - # confirm_frequency must be a multiple of extract_frequency - # and must fit into a fast register; for now; we're going to - # stay in the GPR domain - if confirm_frequency is None: - confirm_frequency = self.extract_frequency - self.confirm_frequency = confirm_frequency - if confirm_frequency % self.extract_frequency: - fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency)) - - self.conf_size = self.confirm_frequency * self.num_buckets - if self.conf_size <= 32: - self.conf_type = IntegerType(32) - elif self.conf_size <= 64: - self.conf_type = IntegerType(64) - else: - fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size) - - # how many bytes in flight at once - self.loop_bytes = 16 - - # confirm configuration - - # how many entries in the top-level confirm table - 256 means - # complete split on the last character - self.conf_top_level_split = 256 - - # how much we 'pull back' in confirm - this is obviously related - # to the first level conf but we will keep two separate paramters - # for this to avoid the risk of conflating these - self.conf_pull_back = 1 - - if self.conf_pull_back > 0 and self.conf_top_level_split < 256: - fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split)) - - # minor stuff - self.default_body_indent = 8 diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index ccf177f0d..0c4ef35d0 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -187,9 +187,9 @@ aligned_unique_ptr FDRCompiler::setupFDR(pair link) { /* we are allowing domains 9 to 15 only */ assert(eng.bits > 8 && eng.bits < 16); fdr->domain = eng.bits; - fdr->schemeWidthByte = eng.schemeWidth / 8; fdr->domainMask = (1 << eng.bits) - 1; - fdr->tabSize = (1 << eng.bits) * fdr->schemeWidthByte; + fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8); + fdr->stride = eng.stride; if (link.first) { fdr->link = verify_u32(ptr - fdr_base); @@ -544,6 +544,7 @@ fdrBuildTableInternal(const vector &lits, bool make_small, // temporary hack for unit testing if (hint != HINT_INVALID) { des->bits = 9; + des->stride = 1; } FDRCompiler fc(lits, *des, make_small); diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index a77a8b898..9b1df5932 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,144 +36,121 @@ #include "util/bitutils.h" #include "util/compare.h" -#define CONF_LOADVAL_CALL lv_u64a -#define CONF_LOADVAL_CALL_CAUTIOUS lv_u64a_ce - // this is ordinary confirmation function which runs through // the whole confirmation procedure static really_inline -void confWithBit(const struct FDRConfirm * fdrc, - const struct FDR_Runtime_Args * a, - size_t i, - CautionReason r, - u32 pullBackAmount, - hwlmcb_rv_t *control, - u32 * last_match) { +void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a, + size_t i, u32 pullBackAmount, hwlmcb_rv_t *control, + u32 *last_match, u64a conf_key) { assert(i < a->len); assert(ISALIGNED(fdrc)); const u8 * buf = a->buf; - const size_t len = a->len; - - CONF_TYPE v; - const u8 * confirm_loc = buf + i - pullBackAmount - 7; - if (likely(r == NOT_CAUTIOUS || confirm_loc >= buf)) { - v = CONF_LOADVAL_CALL(confirm_loc, buf, buf + len); - } else { // r == VECTORING, confirm_loc < buf - u64a histBytes = a->histBytes; - v = CONF_LOADVAL_CALL_CAUTIOUS(confirm_loc, buf, buf + len); - // stitch together v (which doesn't move) and history (which does) - u32 overhang = buf - confirm_loc; - histBytes >>= 64 - (overhang * 8); - v |= histBytes; + u32 c = CONF_HASH_CALL(conf_key, fdrc->andmsk, fdrc->mult, + fdrc->nBitsOrSoleID); + u32 start = getConfirmLitIndex(fdrc)[c]; + if (likely(!start)) { + return; } - u32 c = CONF_HASH_CALL(v, fdrc->andmsk, fdrc->mult, fdrc->nBitsOrSoleID); - u32 start = getConfirmLitIndex(fdrc)[c]; - if (P0(start)) { - const struct LitInfo *l = - (const struct LitInfo *)((const u8 *)fdrc + start); + const struct LitInfo *li + = (const struct LitInfo *)((const u8 *)fdrc + start); - u8 oldNext; // initialized in loop - do { - assert(ISALIGNED(l)); + u8 oldNext; // initialized in loop + do { + assert(ISALIGNED(li)); - if (P0( (v & l->msk) != l->v)) { - goto out; - } + if (unlikely((conf_key & li->msk) != li->v)) { + goto out; + } - if ((*last_match == l->id) && (l->flags & NoRepeat)) { - goto out; - } + if ((*last_match == li->id) && (li->flags & NoRepeat)) { + goto out; + } - const u8 * loc = buf + i - l->size + 1 - pullBackAmount; + const u8 *loc = buf + i - li->size + 1 - pullBackAmount; - u8 caseless = l->flags & Caseless; - if (loc < buf) { - u32 full_overhang = buf - loc; + u8 caseless = li->flags & Caseless; + if (loc < buf) { + u32 full_overhang = buf - loc; - const u8 * history = (caseless) ? - a->buf_history_nocase : a->buf_history; - size_t len_history = (caseless) ? - a->len_history_nocase : a->len_history; + const u8 *history = caseless ? a->buf_history_nocase + : a->buf_history; + size_t len_history = caseless ? a->len_history_nocase + : a->len_history; - // can't do a vectored confirm either if we don't have - // the bytes - if (full_overhang > len_history) { + // can't do a vectored confirm either if we don't have + // the bytes + if (full_overhang > len_history) { + goto out; + } + + // as for the regular case, no need to do a full confirm if + // we're a short literal + if (unlikely(li->size > sizeof(CONF_TYPE))) { + const u8 *s1 = li->s; + const u8 *s2 = s1 + full_overhang; + const u8 *loc1 = history + len_history - full_overhang; + const u8 *loc2 = buf; + size_t size1 = MIN(full_overhang, li->size - sizeof(CONF_TYPE)); + size_t wind_size2_back = sizeof(CONF_TYPE) + full_overhang; + size_t size2 = wind_size2_back > li->size ? + 0 : li->size - wind_size2_back; + + if (cmpForward(loc1, s1, size1, caseless)) { goto out; } - - // as for the regular case, no need to do a full confirm if - // we're a short literal - if (unlikely(l->size > sizeof(CONF_TYPE))) { - const u8 * s1 = l->s; - const u8 * s2 = s1 + full_overhang; - const u8 * loc1 = history + len_history - full_overhang; - const u8 * loc2 = buf; - size_t size1 = MIN(full_overhang, - l->size - sizeof(CONF_TYPE)); - size_t wind_size2_back = sizeof(CONF_TYPE) + - full_overhang; - size_t size2 = wind_size2_back > l->size ? - 0 : l->size - wind_size2_back; - - if (cmpForward(loc1, s1, size1, caseless)) { - goto out; - } - if (cmpForward(loc2, s2, size2, caseless)) { - goto out; - } + if (cmpForward(loc2, s2, size2, caseless)) { + goto out; } - } else { // NON-VECTORING PATH + } + } else { // NON-VECTORING PATH - // if string < conf_type we don't need regular string cmp - if (unlikely(l->size > sizeof(CONF_TYPE))) { - if (cmpForward(loc, l->s, l->size - sizeof(CONF_TYPE), caseless)) { - goto out; - } + // if string < conf_type we don't need regular string cmp + if (unlikely(li->size > sizeof(CONF_TYPE))) { + if (cmpForward(loc, li->s, li->size - sizeof(CONF_TYPE), + caseless)) { + goto out; } } + } - if (P0(!(l->groups & *control))) { - goto out; - } + if (unlikely(!(li->groups & *control))) { + goto out; + } - if (unlikely(l->flags & ComplexConfirm)) { - const u8 * loc2 = buf + i - l->extended_size + 1 - pullBackAmount; - if (loc2 < buf) { - u32 full_overhang = buf - loc2; - size_t len_history = (caseless) ? - a->len_history_nocase : a->len_history; - if (full_overhang > len_history) { - goto out; - } + if (unlikely(li->flags & ComplexConfirm)) { + const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount; + if (loc2 < buf) { + u32 full_overhang = buf - loc2; + size_t len_history = caseless ? a->len_history_nocase + : a->len_history; + if (full_overhang > len_history) { + goto out; } } + } - *last_match = l->id; - *control = a->cb(loc - buf, i, l->id, a->ctxt); -out: - oldNext = l->next; // oldNext is either 0 or an 'adjust' value - l = (const struct LitInfo*)((const u8 *)l + oldNext + l->size); - } while (oldNext); - } + *last_match = li->id; + *control = a->cb(loc - buf, i, li->id, a->ctxt); + out: + oldNext = li->next; // oldNext is either 0 or an 'adjust' value + li = (const struct LitInfo *)((const u8 *)li + oldNext + li->size); + } while (oldNext); } // 'light-weight' confirmation function which is used by 1-mask Teddy; // in the 'confirmless' case it simply calls callback function, // otherwise it calls 'confWithBit' function for the full confirmation procedure static really_inline -void confWithBit1(const struct FDRConfirm * fdrc, - const struct FDR_Runtime_Args * a, - size_t i, - CautionReason r, - hwlmcb_rv_t *control, - u32 * last_match) { +void confWithBit1(const struct FDRConfirm *fdrc, + const struct FDR_Runtime_Args *a, size_t i, + hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) { assert(i < a->len); assert(ISALIGNED(fdrc)); if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, r, 0, control, last_match); + confWithBit(fdrc, a, i, 0, control, last_match, conf_key); return; } else { u32 id = fdrc->nBitsOrSoleID; @@ -190,12 +167,9 @@ void confWithBit1(const struct FDRConfirm * fdrc, // In the 'confirmless' case it makes fast 32-bit comparison, // otherwise it calls 'confWithBit' function for the full confirmation procedure static really_inline -void confWithBitMany(const struct FDRConfirm * fdrc, - const struct FDR_Runtime_Args * a, - size_t i, - CautionReason r, - hwlmcb_rv_t *control, - u32 * last_match) { +void confWithBitMany(const struct FDRConfirm *fdrc, + const struct FDR_Runtime_Args *a, size_t i, CautionReason r, + hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) { assert(i < a->len); assert(ISALIGNED(fdrc)); @@ -204,7 +178,7 @@ void confWithBitMany(const struct FDRConfirm * fdrc, } if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, r, 0, control, last_match); + confWithBit(fdrc, a, i, 0, control, last_match, conf_key); return; } else { const u32 id = fdrc->nBitsOrSoleID; @@ -215,7 +189,7 @@ void confWithBitMany(const struct FDRConfirm * fdrc, } if (r == VECTORING && len > i - a->start_offset) { - if (len > (i + a->len_history)) { + if (len > i + a->len_history) { return; } diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index a141f3882..7e794bb3a 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -68,8 +68,7 @@ void fdrPrintStats(const FDR *fdr, FILE *f) { } if (isTeddy) { - unique_ptr des = - getTeddyDescription(fdr->engineID); + auto des = getTeddyDescription(fdr->engineID); if (des) { fprintf(f, " masks %u\n", des->numMasks); fprintf(f, " buckets %u\n", des->getNumBuckets()); @@ -78,16 +77,8 @@ void fdrPrintStats(const FDR *fdr, FILE *f) { fprintf(f, " \n"); } } else { - unique_ptr des = - getFdrDescription(fdr->engineID); - if (des) { - fprintf(f, " domain %u\n", des->bits); - fprintf(f, " stride %u\n", des->stride); - fprintf(f, " buckets %u\n", des->getNumBuckets()); - fprintf(f, " width %u\n", des->schemeWidth); - } else { - fprintf(f, " \n"); - } + fprintf(f, " domain %u\n", fdr->domain); + fprintf(f, " stride %u\n", fdr->stride); } fprintf(f, " strings ???\n"); diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index 5d470c7e2..103bc2146 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,13 +42,11 @@ using namespace std; namespace ue2 { -#include "fdr_autogen_compiler.cpp" - FDREngineDescription::FDREngineDescription(const FDREngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), def.numBuckets, def.confirmPullBackDistance, def.confirmTopLevelSplit), - schemeWidth(def.schemeWidth), stride(def.stride), bits(0) {} + schemeWidth(def.schemeWidth), stride(0), bits(0) {} u32 FDREngineDescription::getDefaultFloodSuffixLength() const { // rounding up, so that scheme width 32 and 6 buckets is 6 not 5! @@ -56,6 +54,12 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const { return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1; } +void getFdrDescriptions(vector *out) { + static const FDREngineDef def = {0, 128, 8, 0, 1, 256}; + out->clear(); + out->push_back(FDREngineDescription(def)); +} + static u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) { u32 desiredStride = 1; // always our safe fallback @@ -108,32 +112,33 @@ unique_ptr chooseEngine(const target_t &target, FDREngineDescription *best = nullptr; u32 best_score = 0; + FDREngineDescription &eng = allDescs[0]; + for (u32 domain = 9; domain <= 15; domain++) { - for (size_t engineID = 0; engineID < allDescs.size(); engineID++) { + for (size_t stride = 1; stride <= 4; stride *= 2) { // to make sure that domains >=14 have stride 1 according to origin - if (domain > 13 && engineID > 0) { + if (domain > 13 && stride > 1) { continue; } - FDREngineDescription &eng = allDescs[engineID]; if (!eng.isValidOnTarget(target)) { continue; } - if (msl < eng.stride) { + if (msl < stride) { continue; } u32 score = 100; - score -= absdiff(desiredStride, eng.stride); + score -= absdiff(desiredStride, stride); - if (eng.stride <= desiredStride) { - score += eng.stride; + if (stride <= desiredStride) { + score += stride; } u32 effLits = vl.size(); /* * desiredStride;*/ u32 ideal; if (effLits < eng.getNumBuckets()) { - if (eng.stride == 1) { + if (stride == 1) { ideal = 8; } else { ideal = 10; @@ -158,27 +163,28 @@ unique_ptr chooseEngine(const target_t &target, ideal -= 2; } - if (eng.stride > 1) { + if (stride > 1) { ideal++; } DEBUG_PRINTF("effLits %u\n", effLits); if (target.is_atom_class() && !make_small && effLits < 4000) { - /* Unless it is a very heavy case, we want to build smaller tables - * on lightweight machines due to their small caches. */ + /* Unless it is a very heavy case, we want to build smaller + * tables on lightweight machines due to their small caches. */ ideal -= 2; } score -= absdiff(ideal, domain); - DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u " + DEBUG_PRINTF("fdr %u: width=%u, domain=%u, buckets=%u, stride=%zu " "-> score=%u\n", - eng.getID(), eng.schemeWidth, eng.bits, - eng.getNumBuckets(), eng.stride, score); + eng.getID(), eng.schemeWidth, domain, + eng.getNumBuckets(), stride, score); if (!best || score > best_score) { eng.bits = domain; + eng.stride = stride; best = ŋ best_score = score; } diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h index 45f64ac0f..d4e70d4b1 100644 --- a/src/fdr/fdr_engine_description.h +++ b/src/fdr/fdr_engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,6 @@ struct FDREngineDef { u32 id; u32 schemeWidth; u32 numBuckets; - u32 stride; u64a cpu_features; u32 confirmPullBackDistance; u32 confirmTopLevelSplit; @@ -73,7 +72,6 @@ chooseEngine(const target_t &target, const std::vector &vl, bool make_small); std::unique_ptr getFdrDescription(u32 engineID); void getFdrDescriptions(std::vector *out); - } // namespace ue2 #endif diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 607e039c8..cde13f6c1 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,17 +76,17 @@ struct FDR { * structures (spillover strings and hash table) if we're a secondary * structure. */ u32 link; - u8 domain; /* dynamic domain info */ - u8 schemeWidthByte; /* scheme width in bytes */ + u8 stride; /* stride - how frequeuntly the data is consulted by the first + * stage matcher */ + u8 domain; /* number of bits used to index into main FDR table. This value + * is used only of debugging/asserts. */ u16 domainMask; /* pre-computed domain mask */ u32 tabSize; /* pre-computed hashtable size in bytes */ - u32 pad1; + u32 pad; - union { - u32 s_u32; - u64a s_u64a; - m128 s_m128; - } start; + m128 start; /* initial start state to use at offset 0. The state has been set + * up based on the min length of buckets to reduce the need for + * pointless confirms. */ }; /** \brief FDR runtime arguments. diff --git a/src/fdr/fdr_loadval.h b/src/fdr/fdr_loadval.h index 95e8981ff..37baf823e 100644 --- a/src/fdr/fdr_loadval.h +++ b/src/fdr/fdr_loadval.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,12 @@ #define MAKE_LOADVAL(type, name) \ static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi) -#define NORMAL_SAFE(type) assert(ptr >= lo && (ptr + sizeof(type) - 1) < hi) +#define NORMAL_SAFE(type) \ + do { \ + assert(ptr >= lo); \ + assert(ptr + sizeof(type) - 1 < hi); \ + } while(0) + #define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0); // these ones need asserts to test the property that we're not handling dynamically #define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo) diff --git a/src/fdr/teddy_autogen.py b/src/fdr/teddy_autogen.py index 210501100..1cada00c6 100755 --- a/src/fdr/teddy_autogen.py +++ b/src/fdr/teddy_autogen.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright (c) 2015, Intel Corporation +# Copyright (c) 2015-2016, Intel Corporation # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -27,19 +27,110 @@ import sys from autogen_utils import * -from base_autogen import * from string import Template -class MT(MatcherBase): +class MT: + def produce_header(self, visible, header_only = False): + s = "" + if not visible: + s += "static never_inline" + s += """ +hwlm_error_t %s(UNUSED const struct FDR *fdr, + UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name() + if header_only: + s += ";" + else: + s += "{" + s += "\n" + return s + + def produce_guard(self): + print self.arch.get_guard() + + def produce_zero_alternative(self): + print """ +#else +#define %s 0 +#endif +""" % self.get_name() + + def close_guard(self): + print "#endif" + + def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False): + if cautious: + caution_string = "VECTORING" + else: + caution_string = "NOT_CAUTIOUS" + conf_split_mask = IntegerType(32).constant_to_string( + self.conf_top_level_split - 1) + if enable_confirmless: + quick_check_string = """ + if (!fdrc->mult) { + u32 id = fdrc->nBitsOrSoleID; + if ((last_match == id) && (fdrc->flags & NoRepeat)) + continue; + last_match = id; + controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt); + continue; + } """ + else: + quick_check_string = "" + if do_bailout: + bailout_string = """ + if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;""" + else: + bailout_string = "" + + return Template(""" +if (P0(!!$CONFVAR)) { + do { + u32 bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR); + u32 byte = bit / $NUM_BUCKETS + $OFFSET; + u32 bitRem = bit % $NUM_BUCKETS; + $BAILOUT_STRING + u32 confSplit = *(ptr+byte) & $SPLIT_MASK; + u32 idx = confSplit * $NUM_BUCKETS + bitRem; + u32 cf = confBase[idx]; + if (!cf) + continue; + fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf); + if (!(fdrc->groups & *control)) + continue; + $QUICK_CHECK_STRING + CautionReason reason = $CAUTION_STRING; + CONF_TYPE v; + const u8 * confirm_loc = ptr + byte - $CONF_PULL_BACK - 7; + if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { + v = lv_u64a(confirm_loc, buf, buf + len); + } else { // r == VECTORING, confirm_loc < buf + u64a histBytes = a->histBytes; + v = lv_u64a_ce(confirm_loc, buf, buf + len); + // stitch together v (which doesn't move) and history (which does) + u32 overhang = buf - confirm_loc; + histBytes >>= 64 - (overhang * 8); + v |= histBytes; + } + confWithBit(fdrc, a, ptr - buf + byte, $CONF_PULL_BACK, control, &last_match, v); + } while(P0(!!$CONFVAR)); + if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { + *a->groups = controlVal; + return HWLM_TERMINATED; + } +}""").substitute(CONFVAR = conf_var_name, + CONFVAR_SIZE = conf_var_size, + NUM_BUCKETS = self.num_buckets, + OFFSET = offset, + SPLIT_MASK = conf_split_mask, + QUICK_CHECK_STRING = quick_check_string, + BAILOUT_STRING = bailout_string, + CAUTION_STRING = caution_string, + CONF_PULL_BACK = self.conf_pull_back) + def produce_confirm(self, iter, var_name, offset, bits, cautious = True): if self.packed: print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False) else: - if self.num_masks == 1: - conf_func = "confWithBit1" - else: - conf_func = "confWithBitMany" - if cautious: caution_string = "VECTORING" else: @@ -48,16 +139,33 @@ def produce_confirm(self, iter, var_name, offset, bits, cautious = True): print " if (P0(!!%s)) {" % var_name print " do {" if bits == 64: - print " bit = findAndClearLSB_64(&%s);" % (var_name) + print " u32 bit = findAndClearLSB_64(&%s);" % (var_name) else: - print " bit = findAndClearLSB_32(&%s);" % (var_name) - print " byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset) - print " idx = bit %% %d;" % self.num_buckets - print " cf = confBase[idx];" + print " u32 bit = findAndClearLSB_32(&%s);" % (var_name) + print " u32 byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset) + print " u32 idx = bit %% %d;" % self.num_buckets + print " u32 cf = confBase[idx];" print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);" print " if (!(fdrc->groups & *control))" print " continue;" - print " %s(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % (conf_func, caution_string) + print """ + CautionReason reason = %s; + CONF_TYPE v; + const u8 * confirm_loc = ptr + byte - 7; + if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { + v = lv_u64a(confirm_loc, buf, buf + len); + } else { // r == VECTORING, confirm_loc < buf + u64a histBytes = a->histBytes; + v = lv_u64a_ce(confirm_loc, buf, buf + len); + // stitch together v (which doesn't move) and history (which does) + u32 overhang = buf - confirm_loc; + histBytes >>= 64 - (overhang * 8); + v |= histBytes; + }""" % (caution_string) + if self.num_masks == 1: + print " confWithBit1(fdrc, a, ptr - buf + byte, control, &last_match, v);" + else: + print " confWithBitMany(fdrc, a, ptr - buf + byte, %s, control, &last_match, v);" % (caution_string) print " } while(P0(!!%s));" % var_name print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {" print " *a->groups = controlVal;" @@ -146,7 +254,17 @@ def produce_one_iteration(self, iter, effective_num_iterations, cautious = False def produce_code(self): print self.produce_header(visible = True, header_only = False) - print self.produce_common_declarations() + print """ + const u8 * buf = a->buf; + const size_t len = a->len; + const u8 * ptr = buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t * control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 * tryFloodDetect = a->firstFloodDetect; + const struct FDRConfirm *fdrc; + u32 last_match = (u32)-1; +""" print self.produce_needed_temporaries(self.num_iterations) @@ -179,10 +297,17 @@ def produce_code(self): print " ptr += 16;" print " }" - print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {" - print " __builtin_prefetch(ptr + (iterBytes*4));" - print self.produce_flood_check() - + print """ + for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + if (P0(ptr > tryFloodDetect)) { + tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes); + if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { + *a->groups = controlVal; + return HWLM_TERMINATED; + } + } +""" for iter in range(self.num_iterations): self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False) @@ -192,7 +317,11 @@ def produce_code(self): self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True) print " }" - print self.produce_footer() + print """ + *a->groups = controlVal; + return HWLM_SUCCESS; +} +""" def produce_compile_call(self): packed_str = { False : "false", True : "true"}[self.packed] @@ -256,7 +385,17 @@ def produce_needed_temporaries(self, max_iterations): def produce_code(self): print self.produce_header(visible = True, header_only = False) - print self.produce_common_declarations() + print """ + const u8 * buf = a->buf; + const size_t len = a->len; + const u8 * ptr = buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t * control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 * tryFloodDetect = a->firstFloodDetect; + const struct FDRConfirm *fdrc; + u32 last_match = (u32)-1; +""" print self.produce_needed_temporaries(self.num_iterations) @@ -289,9 +428,17 @@ def produce_code(self): print " ptr += 16;" print " }" - print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {" - print " __builtin_prefetch(ptr + (iterBytes*4));" - print self.produce_flood_check() + print """ + for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + if (P0(ptr > tryFloodDetect)) { + tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes); + if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { + *a->groups = controlVal; + return HWLM_TERMINATED; + } + } +""" for iter in range(self.num_iterations): self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False) @@ -302,7 +449,11 @@ def produce_code(self): self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True) print " }" - print self.produce_footer() + print """ + *a->groups = controlVal; + return HWLM_SUCCESS; +} +""" def produce_one_iteration_state_calc(self, iter, effective_num_iterations, cautious, save_old): @@ -367,7 +518,33 @@ def produce_one_iteration_confirm(self, iter, confirmCautious): print "#endif" print " }" -class MTFast(MatcherBase): +class MTFast: + def produce_header(self, visible, header_only = False): + s = "" + if not visible: + s += "static never_inline" + s += """ +hwlm_error_t %s(UNUSED const struct FDR *fdr, + UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name() + if header_only: + s += ";" + else: + s += "{" + s += "\n" + return s + + def produce_guard(self): + print self.arch.get_guard() + + def produce_zero_alternative(self): + print """ +#else +#define %s 0 +#endif +""" % self.get_name() + + def close_guard(self): + print "#endif" def produce_confirm(self, cautious): if cautious: @@ -376,24 +553,52 @@ def produce_confirm(self, cautious): cautious_str = "NOT_CAUTIOUS" print " for (u32 i = 0; i < arrCnt; i++) {" - print " byte = bitArr[i] / 8;" + print " u32 byte = bitArr[i] / 8;" if self.packed: conf_split_mask = IntegerType(32).constant_to_string( self.conf_top_level_split - 1) - print " bitRem = bitArr[i] % 8;" - print " confSplit = *(ptr+byte) & 0x1f;" - print " idx = confSplit * %d + bitRem;" % self.num_buckets - print " cf = confBase[idx];" + print " u32 bitRem = bitArr[i] % 8;" + print " u32 confSplit = *(ptr+byte) & 0x1f;" + print " u32 idx = confSplit * %d + bitRem;" % self.num_buckets + print " u32 cf = confBase[idx];" print " if (!cf)" print " continue;" print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);" print " if (!(fdrc->groups & *control))" print " continue;" - print " confWithBit(fdrc, a, ptr - buf + byte, %s, 0, control, &last_match);" % cautious_str + print """ + CautionReason reason = %s; + CONF_TYPE v; + const u8 * confirm_loc = ptr + byte - 7; + if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { + v = lv_u64a(confirm_loc, buf, buf + len); + } else { // r == VECTORING, confirm_loc < buf + u64a histBytes = a->histBytes; + v = lv_u64a_ce(confirm_loc, buf, buf + len); + // stitch together v (which doesn't move) and history (which does) + u32 overhang = buf - confirm_loc; + histBytes >>= 64 - (overhang * 8); + v |= histBytes; + }""" % (cautious_str) + print " confWithBit(fdrc, a, ptr - buf + byte, 0, control, &last_match, v);" else: - print " cf = confBase[bitArr[i] % 8];" + print " u32 cf = confBase[bitArr[i] % 8];" print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);" - print " confWithBit1(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % cautious_str + print """ + CautionReason reason = %s; + CONF_TYPE v; + const u8 * confirm_loc = ptr + byte - 7; + if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { + v = lv_u64a(confirm_loc, buf, buf + len); + } else { // r == VECTORING, confirm_loc < buf + u64a histBytes = a->histBytes; + v = lv_u64a_ce(confirm_loc, buf, buf + len); + // stitch together v (which doesn't move) and history (which does) + u32 overhang = buf - confirm_loc; + histBytes >>= 64 - (overhang * 8); + v |= histBytes; + }""" % (cautious_str) + print " confWithBit1(fdrc, a, ptr - buf + byte, control, &last_match, v);" print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {" print " *a->groups = controlVal;" print " return HWLM_TERMINATED;" @@ -467,7 +672,17 @@ def produce_one_iteration_state_calc(self, iter, cautious): def produce_code(self): print self.produce_header(visible = True, header_only = False) - print self.produce_common_declarations() + print """ + const u8 * buf = a->buf; + const size_t len = a->len; + const u8 * ptr = buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t * control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 * tryFloodDetect = a->firstFloodDetect; + const struct FDRConfirm *fdrc; + u32 last_match = (u32)-1; +""" print self.produce_needed_temporaries(self.num_iterations) @@ -498,9 +713,18 @@ def produce_code(self): self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True) print " ptr += 32;" print " }" - print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {" - print " __builtin_prefetch(ptr + (iterBytes*4));" - print self.produce_flood_check() + print """ + for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + if (P0(ptr > tryFloodDetect)) { + tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes); + if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { + *a->groups = controlVal; + return HWLM_TERMINATED; + } + } +""" + for iter in range (0, self.num_iterations): self.produce_one_iteration_state_calc(iter = iter, cautious = False) print " arrCnt = 0;" @@ -514,7 +738,11 @@ def produce_code(self): self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True) print " }" - print self.produce_footer() + print """ + *a->groups = controlVal; + return HWLM_SUCCESS; +} +""" def get_name(self): if self.packed: diff --git a/src/runtime.c b/src/runtime.c index cab612271..852eaf929 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -450,11 +450,19 @@ void maintainHistoryBuffer(const struct RoseEngine *rose, char *state, static really_inline void init_stream(struct hs_stream *s, const struct RoseEngine *rose) { + char *state = getMultiState(s); + + // Make absolutely sure that the 16 bytes leading up to the end of the + // history buffer are initialised, as we rely on this (regardless of the + // actual values used) in FDR. + char *hist_end = state + rose->stateOffsets.history + rose->historyRequired; + assert(hist_end - 16 >= (const char *)s); + unaligned_store_u64a(hist_end - 16, 0xDEADDEADDEADDEADull); + unaligned_store_u64a(hist_end - 8, 0xDEADDEADDEADDEADull); + s->rose = rose; s->offset = 0; - char *state = getMultiState(s); - setStreamStatus(state, 0); roseInitState(rose, state); diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index bda8c6248..68d8f6324 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -403,8 +403,11 @@ TEST_P(FDRFloodp, WithMask) { TEST_P(FDRFloodp, StreamingMask) { const u32 hint = GetParam(); SCOPED_TRACE(hint); + const size_t fake_history_size = 16; + const vector fake_history(fake_history_size, 0); const size_t dataSize = 1024; vector data(dataSize); + vector tempdata(dataSize + fake_history_size); // headroom u8 c = '\0'; while (1) { @@ -487,18 +490,28 @@ TEST_P(FDRFloodp, StreamingMask) { for (u32 streamChunk = 1; streamChunk <= 16; streamChunk *= 2) { matchesCounts.clear(); - fdrStatus = fdrExecStreaming(fdr.get(), nullptr, 0, &data[0], streamChunk, - 0, countCallback, &matchesCounts, HWLM_ALL_GROUPS, nullptr); + const u8 *d = data.data(); + // reference past the end of fake history to allow headroom + const u8 *fhist = fake_history.data() + fake_history_size; + fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0, + countCallback, &matchesCounts, + HWLM_ALL_GROUPS, nullptr); ASSERT_EQ(0, fdrStatus); for (u32 j = streamChunk; j < dataSize; j += streamChunk) { - if (j < 8) { - fdrStatus = fdrExecStreaming(fdr.get(), &data[0], j, - &data[0] + j, streamChunk, 0, countCallback, - &matchesCounts, HWLM_ALL_GROUPS, nullptr); + if (j < 16) { + /* allow 16 bytes headroom on read to avoid invalid + * memory read during the FDR zone creation.*/ + memset(tempdata.data(), c, dataSize + fake_history_size); + const u8 *tmp_d = tempdata.data() + fake_history_size; + fdrStatus = fdrExecStreaming(fdr.get(), tmp_d, j, tmp_d + j, + streamChunk, 0, countCallback, + &matchesCounts, + HWLM_ALL_GROUPS, nullptr); } else { - fdrStatus = fdrExecStreaming(fdr.get(), &data[0] + j - 8, - 8, &data[0] + j, streamChunk, 0, countCallback, - &matchesCounts, HWLM_ALL_GROUPS, nullptr); + fdrStatus = fdrExecStreaming(fdr.get(), d + j - 8, 8, d + j, + streamChunk, 0, countCallback, + &matchesCounts, + HWLM_ALL_GROUPS, nullptr); } ASSERT_EQ(0, fdrStatus); } From f0825b4d3ff9a63447637197b452bb8505813094 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 27 Apr 2016 15:17:05 +1000 Subject: [PATCH 182/218] remove unused parts of fdr_loadval --- src/fdr/fdr_loadval.h | 174 +++------------------------------- unit/internal/fdr_loadval.cpp | 98 +------------------ 2 files changed, 13 insertions(+), 259 deletions(-) diff --git a/src/fdr/fdr_loadval.h b/src/fdr/fdr_loadval.h index 37baf823e..86c39c7f3 100644 --- a/src/fdr/fdr_loadval.h +++ b/src/fdr/fdr_loadval.h @@ -29,13 +29,12 @@ #ifndef FDR_LOADVAL_H #define FDR_LOADVAL_H -#include "fdr_internal.h" #include "ue2common.h" #include "util/unaligned.h" -#include "util/simd_utils.h" -#define MAKE_LOADVAL(type, name) \ - static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi) +#define MAKE_LOADVAL(type, name) \ + static really_inline \ + type name(const u8 *ptr, UNUSED const u8 *lo, UNUSED const u8 *hi) #define NORMAL_SAFE(type) \ do { \ @@ -43,179 +42,30 @@ assert(ptr + sizeof(type) - 1 < hi); \ } while(0) -#define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0); -// these ones need asserts to test the property that we're not handling dynamically -#define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo) -#define CAUTIOUS_BACKWARD_SAFE(type) assert((ptr + sizeof(type) - 1) < hi) - -#define CF_INDEX_CHECK (ptr + i < hi) -#define CB_INDEX_CHECK (lo <= ptr + i) -#define CE_INDEX_CHECK (lo <= ptr + i) && (ptr + i < hi) - -#define MAKE_LOOP(TYPE, COND, SHIFT_FIDDLE) \ - TYPE v = 0; \ - for (TYPE i = 0; i < sizeof(TYPE); i++) { \ - if (COND) { \ - v += (TYPE)ptr[i] << ((SHIFT_FIDDLE)*8); \ - } \ - } \ +#define MAKE_LOOP_CE(TYPE) \ + TYPE v = 0; \ + for (TYPE i = 0; i < sizeof(TYPE); i++) { \ + if ((lo <= ptr + i) && (ptr + i < hi)) { \ + v += (TYPE)ptr[i] << (i*8); \ + } \ + } \ return v; -#define MAKE_LOOP_BE(TYPE, COND) \ - MAKE_LOOP(TYPE, COND, sizeof(TYPE)-i-1) - -#define MAKE_LOOP_LE(TYPE, COND) \ - MAKE_LOOP(TYPE, COND, i) - - -#define MAKE_LOOP_BE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CF_INDEX_CHECK) -#define MAKE_LOOP_BE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CB_INDEX_CHECK) -#define MAKE_LOOP_BE_CE(TYPE) MAKE_LOOP_BE(TYPE, CE_INDEX_CHECK) -#define MAKE_LOOP_LE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CF_INDEX_CHECK) -#define MAKE_LOOP_LE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CB_INDEX_CHECK) -#define MAKE_LOOP_LE_CE(TYPE) MAKE_LOOP_LE(TYPE, CE_INDEX_CHECK) - // no suffix = normal (unaligned) -// _a = aligned -// _cf = cautious forwards, base is always in bounds, but may read over the end of the buffer (test against hi) -// _cb = cautious backwards, final byte is always in bounds, but may read over the start of the buffer (test against lo) // _ce = cautious everywhere (in both directions); test against hi and lo -// u8 loadvals -MAKE_LOADVAL(u8, lv_u8) { - NORMAL_SAFE(u8); - return *ptr; -} - -MAKE_LOADVAL(u8, lv_u8_cf) { - CAUTIOUS_FORWARD_SAFE(u8); - if (ptr < hi) { - return *ptr; - } else { - return 0; - } -} - -MAKE_LOADVAL(u8, lv_u8_cb) { - CAUTIOUS_BACKWARD_SAFE(u8); - if (lo <= ptr) { - return *ptr; - } else { - return 0; - } -} - -MAKE_LOADVAL(u8, lv_u8_ce) { - if ((lo <= ptr) && (ptr < hi)) { - return *ptr; - } else { - return 0; - } -} - MAKE_LOADVAL(u16, lv_u16) { NORMAL_SAFE(u16); return unaligned_load_u16(ptr); } -MAKE_LOADVAL(u16, lv_u16_a) { - ALIGNED_SAFE(u16); - return *(const u16 *)ptr; -} - -MAKE_LOADVAL(u32, lv_u32) { - NORMAL_SAFE(u32); - return unaligned_load_u32(ptr); -} - -MAKE_LOADVAL(u32, lv_u32_a) { - ALIGNED_SAFE(u32); - return *(const u32 *)ptr; -} - MAKE_LOADVAL(u64a, lv_u64a) { NORMAL_SAFE(u32); return unaligned_load_u64a(ptr); } -MAKE_LOADVAL(u64a, lv_u64a_a) { - ALIGNED_SAFE(u64a); - return *(const u64a *)ptr; -} - -MAKE_LOADVAL(u16, lv_u16_cf) { MAKE_LOOP_LE_CF(u16); } -MAKE_LOADVAL(u16, lv_u16_cb) { MAKE_LOOP_LE_CB(u16); } -MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_LE_CE(u16); } - -MAKE_LOADVAL(u32, lv_u32_cf) { MAKE_LOOP_LE_CF(u32); } -MAKE_LOADVAL(u32, lv_u32_cb) { MAKE_LOOP_LE_CB(u32); } -MAKE_LOADVAL(u32, lv_u32_ce) { MAKE_LOOP_LE_CE(u32); } - -MAKE_LOADVAL(u64a, lv_u64a_cf) { MAKE_LOOP_LE_CF(u64a); } -MAKE_LOADVAL(u64a, lv_u64a_cb) { MAKE_LOOP_LE_CB(u64a); } -MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_LE_CE(u64a); } - -MAKE_LOADVAL(m128, lv_m128) { - NORMAL_SAFE(m128); - return loadu128(ptr); -} - -MAKE_LOADVAL(m128, lv_m128_a) { - ALIGNED_SAFE(m128); - assert((size_t)ptr % sizeof(m128) == 0); - return *(const m128 *)ptr; -} +MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_CE(u16); } -// m128 cases need to be manually created - -MAKE_LOADVAL(m128, lv_m128_cf) { - CAUTIOUS_FORWARD_SAFE(m128); - union { - u8 val8[16]; - m128 val128; - } u; - - for (u32 i = 0; i < 16; i++) { - if (ptr + i < hi) { - u.val8[i] = ptr[i]; - } else { - u.val8[i] = 0; - } - } - return u.val128; -} - -MAKE_LOADVAL(m128, lv_m128_cb) { - CAUTIOUS_BACKWARD_SAFE(m128); - union { - u8 val8[16]; - m128 val128; - } u; - - for (u32 i = 0; i < 16; i++) { - if (lo <= ptr + i) { - u.val8[i] = ptr[i]; - } else { - u.val8[i] = 0; - } - } - return u.val128; -} - -MAKE_LOADVAL(m128, lv_m128_ce) { - union { - u8 val8[16]; - m128 val128; - } u; - - for (u32 i = 0; i < 16; i++) { - if ((lo <= ptr + i) && (ptr + i < hi)) { - u.val8[i] = ptr[i]; - } else { - u.val8[i] = 0; - } - } - return u.val128; -} +MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_CE(u64a); } #endif diff --git a/unit/internal/fdr_loadval.cpp b/unit/internal/fdr_loadval.cpp index ba5ae6920..22fee7704 100644 --- a/unit/internal/fdr_loadval.cpp +++ b/unit/internal/fdr_loadval.cpp @@ -39,55 +39,26 @@ using namespace ue2; // Normal (unaligned) load. template T lv(const u8 *ptr, const u8 *lo, const u8 *hi); -// Aligned load. -template T lv_a(const u8 *ptr, const u8 *lo, const u8 *hi); - -// Cautious forward load. -template T lv_cf(const u8 *ptr, const u8 *lo, const u8 *hi); - -// Cautious backward load. -template T lv_cb(const u8 *ptr, const u8 *lo, const u8 *hi); - // Cautious everywhere load. template T lv_ce(const u8 *ptr, const u8 *lo, const u8 *hi); -// Special case: there is no specific _a "aligned load" func for u8. We proxy -// that to the normal load. -static u8 lv_u8_a(const u8 *ptr, const u8 *lo, const u8 *hi) { - return lv_u8(ptr, lo, hi); -} - #define BUILD_LOADVALS(vtype) \ template <> vtype lv(const u8 *ptr, const u8 *lo, const u8 *hi) { \ return lv_##vtype(ptr, lo, hi); \ } \ - template <> vtype lv_a(const u8 *ptr, const u8 *lo, const u8 *hi) { \ - return lv_##vtype##_a(ptr, lo, hi); \ - } \ - template <> \ - vtype lv_cf(const u8 *ptr, const u8 *lo, const u8 *hi) { \ - return lv_##vtype##_cf(ptr, lo, hi); \ - } \ - template <> \ - vtype lv_cb(const u8 *ptr, const u8 *lo, const u8 *hi) { \ - return lv_##vtype##_cb(ptr, lo, hi); \ - } \ template <> \ vtype lv_ce(const u8 *ptr, const u8 *lo, const u8 *hi) { \ return lv_##vtype##_ce(ptr, lo, hi); \ } -BUILD_LOADVALS(u8) BUILD_LOADVALS(u16) -BUILD_LOADVALS(u32) BUILD_LOADVALS(u64a) -BUILD_LOADVALS(m128) template class FDR_Loadval : public testing::Test { // empty }; -typedef ::testing::Types LoadvalTypes; +typedef ::testing::Types LoadvalTypes; TYPED_TEST_CASE(FDR_Loadval, LoadvalTypes); @@ -114,73 +85,6 @@ TYPED_TEST(FDR_Loadval, Normal) { } } -TYPED_TEST(FDR_Loadval, Aligned) { - const size_t len = sizeof(TypeParam); - aligned_unique_ptr mem_p = aligned_zmalloc_unique(len); // 16 aligned - u8 * mem = mem_p.get(); - ASSERT_TRUE(ISALIGNED_16(mem)); - fillWithBytes(mem, len); - - TypeParam val = lv_a(mem, mem, mem + len); - - // Should be identical to 'mem' in byte order. - ASSERT_EQ(0, memcmp(&val, mem, len)); -} - -TYPED_TEST(FDR_Loadval, CautiousForward) { - // For a cautious forward load, we will get zeroes for all bytes after the - // 'hi' ptr. - const size_t len = sizeof(TypeParam); - - aligned_unique_ptr mem_p = aligned_zmalloc_unique(len + 1); - u8 *mem = mem_p.get() + 1; // force unaligned - fillWithBytes(mem, len); - - for (size_t i = 1; i <= len; i++) { - const u8 *ptr = mem; - const u8 *lo = ptr; - const u8 *hi = ptr + i; - union { - TypeParam val; - u8 bytes[sizeof(TypeParam)]; - } x; - - x.val = lv_cf(ptr, lo, hi); - - // Low bytes will be correct, bytes >= hi will be zero. - for (size_t j = 0; j < len; j++) { - ASSERT_EQ(j < i ? mem[j] : 0, x.bytes[j]); - } - } -} - -TYPED_TEST(FDR_Loadval, CautiousBackward) { - // For a cautious backwards load, we will get zeroes for all bytes before - // the 'lo' ptr. - const size_t len = sizeof(TypeParam); - - aligned_unique_ptr mem_p = aligned_zmalloc_unique(len + 1); - u8 *mem = mem_p.get() + 1; // force unaligned - fillWithBytes(mem, len); - - for (size_t i = 1; i <= len; i++) { - const u8 *ptr = mem; - const u8 *lo = ptr + sizeof(TypeParam) - i; - const u8 *hi = ptr + sizeof(TypeParam); - union { - TypeParam val; - u8 bytes[sizeof(TypeParam)]; - } x; - - x.val = lv_cb(ptr, lo, hi); - - // Low bytes will be zero, bytes >= lo will be correct. - for (size_t j = 0; j < len; j++) { - ASSERT_EQ(j < sizeof(TypeParam) - i ? 0 : mem[j], x.bytes[j]); - } - } -} - TYPED_TEST(FDR_Loadval, CautiousEverywhere) { // For a cautious backwards load, we will get zeroes for all bytes before // the 'lo' ptr or after the 'hi' ptr. From 1a373d0619abb3df1881441830b68f7030e56463 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 29 Apr 2016 09:16:39 +1000 Subject: [PATCH 183/218] fdr: confVal load can be unaligned --- src/fdr/fdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index c955680bf..bd7dbe83d 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -355,7 +355,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal, ptr_main + byte - a->buf, id, a->ctxt); continue; } - u64a confVal = *(const u64a *)(confLoc + byte - sizeof(u64a)); + u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a)); confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control, last_match_id, confVal); } while (unlikely(!!*conf)); From 11896dcf424c1f9f0ab32483d8b0712c004d4b9f Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 2 May 2016 13:03:17 +1000 Subject: [PATCH 184/218] restore ability to ignore common preds for merges Check for common preds was broken during refactoring in b9c5d65: Rework literal overlap checks for merging engines --- src/rose/rose_build_merge.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 42dd4d2fd..a10bc86e9 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1125,7 +1125,7 @@ bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, } for (auto u : preds) { - if (!contains(known_good_preds, &u)) { + if (!contains(known_good_preds, u)) { insert(&pred_lits, tbi.g[u].literals); } } From c12b9531317248c6333869e43281f9a5764fd5bc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 28 Apr 2016 09:34:37 +1000 Subject: [PATCH 185/218] runtime: add error for "scratch in use" This commit adds the HS_SCRATCH_IN_USE error, which is returned when Hyperscan detects that a scratch region is already in use on entry to an API function. --- src/hs_common.h | 21 ++- src/runtime.c | 51 +++++- src/scratch.c | 9 + src/scratch.h | 29 ++++ unit/CMakeLists.txt | 1 + unit/hyperscan/scratch_in_use.cpp | 275 ++++++++++++++++++++++++++++++ unit/hyperscan/test_util.h | 4 +- 7 files changed, 384 insertions(+), 6 deletions(-) create mode 100644 unit/hyperscan/scratch_in_use.cpp diff --git a/src/hs_common.h b/src/hs_common.h index b38d9505c..4bf31146c 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -500,6 +500,25 @@ const char *hs_version(void); */ #define HS_BAD_ALLOC (-9) +/** + * The scratch region was already in use. + * + * This error is returned when Hyperscan is able to detect that the scratch + * region given is already in use by another Hyperscan API call. + * + * A separate scratch region, allocated with @ref hs_alloc_scratch() or @ref + * hs_clone_scratch(), is required for every concurrent caller of the Hyperscan + * API. + * + * For example, this error might be returned when @ref hs_scan() has been + * called inside a callback delivered by a currently-executing @ref hs_scan() + * call using the same scratch region. + * + * Note: Not all concurrent uses of scratch regions may be detected. This error + * is intended as a best-effort debugging tool, not a guarantee. + */ +#define HS_SCRATCH_IN_USE (-10) + /** @} */ #ifdef __cplusplus diff --git a/src/runtime.c b/src/runtime.c index 852eaf929..95f21d84e 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -319,8 +319,13 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, return HS_INVALID; } + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } + if (rose->minWidth > length) { DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length); + unmarkScratchInUse(scratch); return HS_SUCCESS; } @@ -394,12 +399,14 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, done_scan: if (told_to_stop_matching(scratch)) { + unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } if (rose->hasSom) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { + unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } } @@ -412,7 +419,10 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, set_retval: DEBUG_PRINTF("done. told_to_stop_matching=%d\n", told_to_stop_matching(scratch)); - return told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED : HS_SUCCESS; + hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED + : HS_SUCCESS; + unmarkScratchInUse(scratch); + return rv; } static really_inline @@ -674,7 +684,11 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, if (!scratch || !validScratch(to_id->rose, scratch)) { return HS_INVALID; } + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } report_eod_matches(to_id, scratch, onEvent, context); + unmarkScratchInUse(scratch); } size_t stateSize @@ -784,7 +798,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, unsigned length, UNUSED unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { - if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) { + assert(id); + assert(scratch); + + if (unlikely(!data)) { return HS_INVALID; } @@ -878,8 +895,18 @@ HS_PUBLIC_API hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { - return hs_scan_stream_internal(id, data, length, flags, scratch, - onEvent, context); + if (unlikely(!id || !scratch || !data || + !validScratch(id->rose, scratch))) { + return HS_INVALID; + } + + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } + hs_error_t rv = hs_scan_stream_internal(id, data, length, flags, scratch, + onEvent, context); + unmarkScratchInUse(scratch); + return rv; } HS_PUBLIC_API @@ -893,7 +920,11 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, if (!scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } report_eod_matches(id, scratch, onEvent, context); + unmarkScratchInUse(scratch); } hs_stream_free(id); @@ -913,7 +944,11 @@ hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, if (!scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } report_eod_matches(id, scratch, onEvent, context); + unmarkScratchInUse(scratch); } init_stream(id, id->rose); @@ -995,6 +1030,10 @@ hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, return HS_INVALID; } + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } + hs_stream_t *id = (hs_stream_t *)(scratch->bstate); init_stream(id, rose); /* open stream */ @@ -1009,6 +1048,7 @@ hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, = hs_scan_stream_internal(id, data[i], length[i], 0, scratch, onEvent, context); if (ret != HS_SUCCESS) { + unmarkScratchInUse(scratch); return ret; } } @@ -1018,9 +1058,12 @@ hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, report_eod_matches(id, scratch, onEvent, context); if (told_to_stop_matching(scratch)) { + unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } } + unmarkScratchInUse(scratch); + return HS_SUCCESS; } diff --git a/src/scratch.c b/src/scratch.c index 42db42acd..b496833a5 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -129,6 +129,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { *s = *proto; s->magic = SCRATCH_MAGIC; + s->in_use = 1; s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; @@ -254,6 +255,9 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { if ((*scratch)->magic != SCRATCH_MAGIC) { return HS_INVALID; } + if (markScratchInUse(*scratch)) { + return HS_SCRATCH_IN_USE; + } } const struct RoseEngine *rose = hs_get_bytecode(db); @@ -355,6 +359,7 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ } + unmarkScratchInUse(*scratch); return HS_SUCCESS; } @@ -384,6 +389,10 @@ hs_error_t hs_free_scratch(hs_scratch_t *scratch) { if (scratch->magic != SCRATCH_MAGIC) { return HS_INVALID; } + if (markScratchInUse(scratch)) { + return HS_SCRATCH_IN_USE; + } + scratch->magic = 0; assert(scratch->scratch_alloc); DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch, diff --git a/src/scratch.h b/src/scratch.h index 21ec809cf..f8e322f8b 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -140,6 +140,7 @@ struct match_deduper { */ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 magic; + u8 in_use; /**< non-zero when being used by an API call. */ char *scratch_alloc; /* user allocated scratch object */ u32 queueCount; u32 bStateSize; /**< sizeof block mode states */ @@ -198,6 +199,34 @@ char can_stop_matching(const struct hs_scratch *scratch) { return scratch->core_info.status & (STATUS_TERMINATED | STATUS_EXHAUSTED); } +/** + * \brief Mark scratch as in use. + * + * Returns non-zero if it was already in use, zero otherwise. + */ +static really_inline +char markScratchInUse(struct hs_scratch *scratch) { + DEBUG_PRINTF("marking scratch as in use\n"); + assert(scratch && scratch->magic == SCRATCH_MAGIC); + if (scratch->in_use) { + DEBUG_PRINTF("scratch already in use!\n"); + return 1; + } + scratch->in_use = 1; + return 0; +} + +/** + * \brief Mark scratch as no longer in use. + */ +static really_inline +void unmarkScratchInUse(struct hs_scratch *scratch) { + DEBUG_PRINTF("marking scratch as not in use\n"); + assert(scratch && scratch->magic == SCRATCH_MAGIC); + assert(scratch->in_use == 1); + scratch->in_use = 0; +} + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index b3cc8cea2..a893d3d5d 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -97,6 +97,7 @@ set(unit_hyperscan_SOURCES hyperscan/multi.cpp hyperscan/order.cpp hyperscan/scratch_op.cpp + hyperscan/scratch_in_use.cpp hyperscan/serialize.cpp hyperscan/single.cpp hyperscan/som.cpp diff --git a/unit/hyperscan/scratch_in_use.cpp b/unit/hyperscan/scratch_in_use.cpp new file mode 100644 index 000000000..ddd4bf4e7 --- /dev/null +++ b/unit/hyperscan/scratch_in_use.cpp @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "test_util.h" + +#include "hs.h" +#include "gtest/gtest.h" + +#include + +using namespace std; + +struct RescanContext { + RescanContext(const hs_database_t *db_in, hs_scratch_t *scratch_in) + : db(db_in), scratch(scratch_in) {} + const hs_database_t *db; + hs_scratch_t *scratch; + size_t matches = 0; +}; + +struct HyperscanDatabaseDeleter { + void operator()(hs_database_t *db) const { + hs_error_t err = hs_free_database(db); + EXPECT_EQ(HS_SUCCESS, err); + } +}; + +unique_ptr +makeDatabase(const char *expression, unsigned int flags, unsigned int mode) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + hs_error_t err = hs_compile(expression, flags, mode, nullptr, &db, + &compile_err); + EXPECT_EQ(HS_SUCCESS, err); + + return unique_ptr(db); +} + +// Generic block mode test that uses the given scan callback. +static +void runBlockTest(match_event_handler cb_func) { + auto db = makeDatabase("foo.*bar", 0, HS_MODE_BLOCK); + ASSERT_NE(nullptr, db.get()); + + hs_scratch_t *scratch = nullptr; + hs_error_t err = hs_alloc_scratch(db.get(), &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + RescanContext rc(db.get(), scratch); + const string data = "___foo___bar_"; + + err = hs_scan(db.get(), data.c_str(), data.length(), 0, scratch, + cb_func, &rc); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(1, rc.matches); + + // teardown + hs_free_scratch(scratch); +} + +// Generic streaming mode test that uses the given scan callback. +static +void runStreamingTest(match_event_handler cb_func) { + auto db = makeDatabase("foo.*bar", 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db.get()); + + hs_scratch_t *scratch = nullptr; + hs_error_t err = hs_alloc_scratch(db.get(), &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + hs_stream_t *stream = nullptr; + err = hs_open_stream(db.get(), 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(stream != nullptr); + + RescanContext rc(db.get(), scratch); + const string data = "___foo___bar_"; + + err = hs_scan_stream(stream, data.c_str(), data.length(), 0, scratch, + cb_func, &rc); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(1, rc.matches); + + // teardown + hs_close_stream(stream, scratch, nullptr, nullptr); + hs_free_scratch(scratch); +} + +// Generic vectored mode test that uses the given scan callback. +static +void runVectoredTest(match_event_handler cb_func) { + auto db = makeDatabase("foo.*bar", 0, HS_MODE_VECTORED); + ASSERT_NE(nullptr, db.get()); + + hs_scratch_t *scratch = nullptr; + hs_error_t err = hs_alloc_scratch(db.get(), &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + RescanContext rc(db.get(), scratch); + const string data1 = "___foo_"; + const string data2 = "bar_"; + + const char *vec[] = {data1.c_str(), data2.c_str()}; + const unsigned int len[] = {unsigned(data1.length()), + unsigned(data2.length())}; + + err = hs_scan_vector(db.get(), vec, len, 2, 0, scratch, cb_func, &rc); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(1, rc.matches); + + // teardown + hs_free_scratch(scratch); +} + +static +int rescan_block_cb(unsigned, unsigned long long, unsigned long long, unsigned, + void *ctx) { + RescanContext *rctx = (RescanContext *)ctx; + rctx->matches++; + + const string data = "___foo___bar_"; + + hs_error_t err = hs_scan(rctx->db, data.c_str(), data.length(), 0, + rctx->scratch, dummy_cb, nullptr); + EXPECT_EQ(HS_SCRATCH_IN_USE, err); + return 0; +} + + +// Attempt to use in-use scratch inside block mode callback. +TEST(ScratchInUse, Block) { + runBlockTest(rescan_block_cb); +} + +static +int rescan_stream_cb(unsigned, unsigned long long, unsigned long long, unsigned, + void *ctx) { + RescanContext *rctx = (RescanContext *)ctx; + rctx->matches++; + + const string data = "___foo___bar_"; + + hs_stream_t *stream = nullptr; + hs_error_t err = hs_open_stream(rctx->db, 0, &stream); + EXPECT_EQ(HS_SUCCESS, err); + EXPECT_TRUE(stream != nullptr); + if (stream == nullptr) { + return 1; + } + + err = hs_scan_stream(stream, data.c_str(), data.length(), 0, + rctx->scratch, dummy_cb, nullptr); + EXPECT_EQ(HS_SCRATCH_IN_USE, err); + + hs_close_stream(stream, nullptr, nullptr, nullptr); + return 0; +} + +// Attempt to use in-use scratch inside streaming mode callback. +TEST(ScratchInUse, Streaming) { + runStreamingTest(rescan_stream_cb); +} + +static +int rescan_vector_cb(unsigned, unsigned long long, unsigned long long, unsigned, + void *ctx) { + RescanContext *rctx = (RescanContext *)ctx; + rctx->matches++; + + const string data1 = "___foo_"; + const string data2 = "bar_"; + + const char *vec[] = {data1.c_str(), data2.c_str()}; + const unsigned int len[] = {unsigned(data1.length()), + unsigned(data2.length())}; + + hs_error_t err = hs_scan_vector(rctx->db, vec, len, 2, 0, rctx->scratch, + dummy_cb, nullptr); + EXPECT_EQ(HS_SCRATCH_IN_USE, err); + return 0; +} + +// Attempt to use in-use scratch inside vectored mode callback. +TEST(ScratchInUse, Vectored) { + runVectoredTest(rescan_vector_cb); +} + +static +int rescan_realloc_cb(unsigned, unsigned long long, unsigned long long, + unsigned, void *ctx) { + RescanContext *rctx = (RescanContext *)ctx; + rctx->matches++; + + auto db = makeDatabase("another db", 0, HS_MODE_BLOCK); + hs_error_t err = hs_alloc_scratch(db.get(), &rctx->scratch); + EXPECT_EQ(HS_SCRATCH_IN_USE, err); + return 0; +} + +// Attempt to use hs_alloc_scratch on in-use scratch inside callback (block +// scan). +TEST(ScratchInUse, ReallocScratchBlock) { + runBlockTest(rescan_realloc_cb); +} + +// Attempt to use hs_alloc_scratch on in-use scratch inside callback (streaming +// scan). +TEST(ScratchInUse, ReallocScratchStreaming) { + runStreamingTest(rescan_realloc_cb); +} + +// Attempt to use hs_alloc_scratch on in-use scratch inside callback (vectored +// scan). +TEST(ScratchInUse, ReallocScratchVector) { + runVectoredTest(rescan_realloc_cb); +} + +static +int rescan_free_cb(unsigned, unsigned long long, unsigned long long, + unsigned, void *ctx) { + RescanContext *rctx = (RescanContext *)ctx; + rctx->matches++; + + hs_error_t err = hs_free_scratch(rctx->scratch); + EXPECT_EQ(HS_SCRATCH_IN_USE, err); + return 0; +} + +// Attempt to use hs_free_scratch on in-use scratch inside callback (block +// scan). +TEST(ScratchInUse, FreeScratchBlock) { + runBlockTest(rescan_free_cb); +} + +// Attempt to use hs_free_scratch on in-use scratch inside callback (streaming +// scan). +TEST(ScratchInUse, FreeScratchStreaming) { + runStreamingTest(rescan_free_cb); +} + +// Attempt to use hs_free_scratch on in-use scratch inside callback (vectored +// scan). +TEST(ScratchInUse, FreeScratchVector) { + runVectoredTest(rescan_free_cb); +} diff --git a/unit/hyperscan/test_util.h b/unit/hyperscan/test_util.h index 1ce0c182d..fad6137c1 100644 --- a/unit/hyperscan/test_util.h +++ b/unit/hyperscan/test_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,7 +29,9 @@ #ifndef TEST_UTIL_H #define TEST_UTIL_H +#include #include +#include #include #include "hs.h" From a7d02a996e59c0594b1306be9731f5a4b20a91be Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 4 May 2016 13:56:24 +1000 Subject: [PATCH 186/218] fdr_streaming_runtime: include Required for use of memcpy(). --- src/fdr/fdr_streaming_runtime.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fdr/fdr_streaming_runtime.h b/src/fdr/fdr_streaming_runtime.h index 07dacf9de..fa5843c5d 100644 --- a/src/fdr/fdr_streaming_runtime.h +++ b/src/fdr/fdr_streaming_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,6 +32,8 @@ #include "fdr_streaming_internal.h" #include "util/partial_store.h" +#include + static really_inline const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) { const u8 * linkPtr = ((const u8 *)fdr) + fdr->link; From b2ea64171528f3fd87e457040258dcccefa49018 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 May 2016 13:18:19 +1000 Subject: [PATCH 187/218] doc: update copyright dates --- doc/dev-reference/conf.py.in | 2 +- doc/dev-reference/copyright.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/dev-reference/conf.py.in b/doc/dev-reference/conf.py.in index 169568f44..9f089883e 100644 --- a/doc/dev-reference/conf.py.in +++ b/doc/dev-reference/conf.py.in @@ -44,7 +44,7 @@ master_doc = 'index' # General information about the project. project = u'Hyperscan' -copyright = u'2015, Intel Corporation' +copyright = u'2015-2016, Intel Corporation' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/doc/dev-reference/copyright.rst b/doc/dev-reference/copyright.rst index 88be1adcb..737b160f5 100644 --- a/doc/dev-reference/copyright.rst +++ b/doc/dev-reference/copyright.rst @@ -30,4 +30,4 @@ and/or other countries. \*Other names and brands may be claimed as the property of others. -Copyright |copy| 2015, Intel Corporation. All rights reserved. +Copyright |copy| 2015-2016, Intel Corporation. All rights reserved. From 3426bf57ec39fe588baaab260a6b8725c2f40182 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 May 2016 11:12:36 +1000 Subject: [PATCH 188/218] doc: clean up discussion of scratch space --- doc/dev-reference/runtime.rst | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/doc/dev-reference/runtime.rst b/doc/dev-reference/runtime.rst index 0f3fc5fa7..665395a83 100644 --- a/doc/dev-reference/runtime.rst +++ b/doc/dev-reference/runtime.rst @@ -124,13 +124,19 @@ databases, only a single scratch region is necessary: in this case, calling will ensure that the scratch space is large enough to support scanning against any of the given databases. -Importantly, only one such space is required per thread and can (and indeed -should) be allocated before data scanning is to commence. In a scenario where a -set of expressions are compiled by a single "master" thread and data will be -scanned by multiple "worker" threads, the convenience function -:c:func:`hs_clone_scratch` allows multiple copies of an existing scratch space -to be made for each thread (rather than forcing the caller to pass all the -compiled databases through :c:func:`hs_alloc_scratch` multiple times). +While the Hyperscan library is re-entrant, the use of scratch spaces is not. +For example, if by design it is deemed necessary to run recursive or nested +scanning (say, from the match callback function), then an additional scratch +space is required for that context. + +In the absence of recursive scanning, only one such space is required per thread +and can (and indeed should) be allocated before data scanning is to commence. + +In a scenario where a set of expressions are compiled by a single "master" +thread and data will be scanned by multiple "worker" threads, the convenience +function :c:func:`hs_clone_scratch` allows multiple copies of an existing +scratch space to be made for each thread (rather than forcing the caller to pass +all the compiled databases through :c:func:`hs_alloc_scratch` multiple times). For example: @@ -163,14 +169,6 @@ For example: /* Now two threads can both scan against database db, each with its own scratch space. */ -While the Hyperscan library is re-entrant, the use of scratch spaces is not. -For example, if by design it is deemed necessary to run recursive or nested -scanning (say, from the match callback function), then an additional scratch -space is required for that context. - -The easiest way to achieve this is to build up a single scratch space as a -prototype, then clone it for each context: - ***************** Custom Allocators ***************** From 8f46b7cf2e03b3f04d4ec6d3d835a508eb4694b5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 May 2016 11:57:41 +1000 Subject: [PATCH 189/218] doc: add description of serialization, runtime lib --- doc/dev-reference/index.rst | 1 + doc/dev-reference/serialization.rst | 67 +++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 doc/dev-reference/serialization.rst diff --git a/doc/dev-reference/index.rst b/doc/dev-reference/index.rst index ddc523f08..df4f89161 100644 --- a/doc/dev-reference/index.rst +++ b/doc/dev-reference/index.rst @@ -15,6 +15,7 @@ Hyperscan |version| Developer's Reference Guide getting_started compilation runtime + serialization performance api_constants api_files diff --git a/doc/dev-reference/serialization.rst b/doc/dev-reference/serialization.rst new file mode 100644 index 000000000..4f884c753 --- /dev/null +++ b/doc/dev-reference/serialization.rst @@ -0,0 +1,67 @@ +.. _serialization: + +############# +Serialization +############# + +For some applications, compiling Hyperscan pattern databases immediately prior +to use is not an appropriate design. Some users may wish to: + +* Compile pattern databases on a different host; + +* Persist compiled databases to storage and only re-compile pattern databases + when the patterns change; + +* Control the region of memory in which the compiled database is located. + +Hyperscan pattern databases are not completely flat in memory: they contain +pointers and have specific alignment requirements. Therefore, they cannot be +copied (or otherwise relocated) directly. To enable these use cases, Hyperscan +provides functionality for serializing and deserializing compiled pattern +databases. + +The API provides the following functions: + +#. :c:func:`hs_serialize_database`: serializes a pattern database into a + flat relocatable buffer of bytes. + +#. :c:func:`hs_deserialize_database`: reconstructs a newly allocated pattern + database from the output of :c:func:`hs_serialize_database`. + +#. :c:func:`hs_deserialize_database_at`: reconstructs a pattern + database at a given memory location from the output of + :c:func:`hs_serialize_database`. + +#. :c:func:`hs_serialized_database_size`: given a serialized pattern database, + returns the size of the memory block required by the database when + deserialized. + +#. :c:func:`hs_serialized_database_info`: given a serialized pattern database, + returns a string containing information about the database. This call is + analogous to :c:func:`hs_database_info`. + +.. note:: Hyperscan performs both version and platform compatibility checks + upon deserialization. The :c:func:`hs_deserialize_database` and + :c:func:`hs_deserialize_database_at` functions will only permit the + deserialization of databases compiled with (a) the same version of Hyperscan + and (b) platform features supported by the current host platform. See + :ref:`instr_specialization` for more information on platform specialization. + +=================== +The Runtime Library +=================== + +The main Hyperscan library (``libhs``) contains both the compiler and runtime +portions of the library. This means that in order to support the Hyperscan +compiler, which is written in C++, it requires C++ linkage and has a +dependency on the C++ standard library. + +Many embedded applications require only the scanning ("runtime") portion of the +Hyperscan library. In these cases, pattern compilation generally takes place on +another host, and serialized pattern databases are delivered to the application +for use. + +To support these applications without requiring the C++ dependency, a +runtime-only version of the Hyperscan library, called ``libhs_runtime``, is also +distributed. This library does not depend on the C++ standard library and +provides all Hyperscan functions other that those used to compile databases. From 01169e69d25c3527ed6c418c0bb9fb20c9f20dd9 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Tue, 19 Apr 2016 15:40:18 +0100 Subject: [PATCH 190/218] Fix for multiaccel matcher unit test Test was failing on 32-bit OS with AVX2 --- unit/internal/multiaccel_matcher.cpp | 32 +++++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/unit/internal/multiaccel_matcher.cpp b/unit/internal/multiaccel_matcher.cpp index 34e5c5ed8..45a24f46f 100644 --- a/unit/internal/multiaccel_matcher.cpp +++ b/unit/internal/multiaccel_matcher.cpp @@ -40,6 +40,7 @@ extern "C" { #include "nfa/multivermicelli.h" #include "nfa/multishufti.h" #include "nfa/multitruffle.h" +#include "util/alloc.h" #include "util/charreach.h" #include @@ -68,17 +69,18 @@ char getChar(const CharReach &cr, bool match) { // appends a string with matches/unmatches according to input match pattern static -void getMatch(vector &result, const string &pattern, const CharReach &cr) { +void getMatch(u8 *result, u32 start, const string &pattern, + const CharReach &cr) { for (const auto &c : pattern) { - result.push_back(getChar(cr, c == '1')); + result[start++] = getChar(cr, c == '1'); } } // appends non-matching noise of certain lengths static -void getNoise(vector &result, u32 len, const CharReach &cr) { +void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) { for (unsigned i = 0; i < len; i++) { - result.push_back(getChar(cr, false)); + result[start + i] = getChar(cr, false); } } @@ -111,7 +113,7 @@ class MultiaccelTest : public TestWithParam { const MultiaccelTestParam &p = GetParam(); // reserve space in our buffer - buffer.reserve(BUF_SIZE); + buffer = (u8 *)aligned_zmalloc(BUF_SIZE); // store the index where we expect to see the match. note that it may // be different from where the match pattern has started since we may @@ -129,13 +131,16 @@ class MultiaccelTest : public TestWithParam { const MultiaccelTestParam &p = GetParam(); // step 1: fill prefix with non-matching noise - getNoise(buffer, p.match_pattern_start_idx, cr); + u32 start = 0; + getNoise(buffer, start, p.match_pattern_start_idx, cr); // step 2: add a match - getMatch(buffer, p.match_pattern, cr); + start += p.match_pattern_start_idx; + getMatch(buffer, start, p.match_pattern, cr); // step 3: fill in the rest of the buffer with non-matching noise - getNoise(buffer, BUF_SIZE - p.match_pattern.size() - + start += p.match_pattern.size(); + getNoise(buffer, start, BUF_SIZE - p.match_pattern.size() - p.match_pattern_start_idx, cr); } @@ -159,24 +164,25 @@ class MultiaccelTest : public TestWithParam { } virtual void TearDown() { + aligned_free(buffer); } u32 match_idx; - vector buffer; + u8 *buffer; bool test_all_offsets; }; static -void runTest(const vector &buffer, AccelAux *aux, unsigned match_idx, +void runTest(const u8 *buffer, AccelAux *aux, unsigned match_idx, bool test_all_offsets) { - const u8 *start = buffer.data(); - const u8 *end = start + buffer.size(); + const u8 *start = buffer; + const u8 *end = start + BUF_SIZE; const u8 *match = start + match_idx; // comparing indexes into the buffer is easier to understand than pointers if (test_all_offsets) { // run_accel can only scan >15 byte buffers - u32 end_offset = min(match_idx, (u32) buffer.size() - 15); + u32 end_offset = min(match_idx, BUF_SIZE - 15); for (unsigned offset = 0; offset < end_offset; offset++) { const u8 *ptr = run_accel(aux, (start + offset), end); From 38797abb066fe083ca0d4e900d46697e7cd2388c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 6 May 2016 11:32:36 +1000 Subject: [PATCH 191/218] unit: fix bug in teddy model unit tests --- unit/internal/fdr.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index 3aada867f..d10c25f0c 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -59,8 +59,8 @@ using namespace ue2; #define CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint) \ { \ auto descr = getTeddyDescription(hint); \ - if (descr && fdr != nullptr) { \ - return; \ + if (descr && fdr == nullptr) { \ + return; /* cannot build Teddy for this set of literals */ \ } else { \ ASSERT_TRUE(fdr != nullptr); \ } \ From f532b89776f537348345d24a45bc22278357f87a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 6 May 2016 11:38:24 +1000 Subject: [PATCH 192/218] unit: clean up getValidFdrEngines() --- unit/internal/fdr.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index d10c25f0c..ddfa8bb15 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -133,23 +133,25 @@ hwlmcb_rv_t decentCallbackT(size_t start, size_t end, u32 id, void *ctxt) { } // namespace -static vector getValidFdrEngines() { +static +vector getValidFdrEngines() { + const auto target = get_current_target(); + vector ret; - vector des; - getFdrDescriptions(&des); - for (vector::const_iterator it = des.begin(); - it != des.end(); ++it) { - if (it->isValidOnTarget(get_current_target())) { - ret.push_back(it->getID()); + + vector fdr_descriptions; + getFdrDescriptions(&fdr_descriptions); + for (const FDREngineDescription &d : fdr_descriptions) { + if (d.isValidOnTarget(target)) { + ret.push_back(d.getID()); } } - vector tDes; - getTeddyDescriptions(&tDes); - for (vector::const_iterator it = tDes.begin(); - it != tDes.end(); ++it) { - if (it->isValidOnTarget(get_current_target())) { - ret.push_back(it->getID()); + vector teddy_descriptions; + getTeddyDescriptions(&teddy_descriptions); + for (const TeddyEngineDescription &d : teddy_descriptions) { + if (d.isValidOnTarget(target)) { + ret.push_back(d.getID()); } } From 6c1143a264e89eef01818dd80db0b4ca86f7ab32 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 6 May 2016 14:20:38 +1000 Subject: [PATCH 193/218] ue2string: fix broken function type in upperString Silences warning from ubsan. --- src/util/ue2string.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 6fdc57ba5..7c16aa587 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -129,7 +129,9 @@ string dumpString(const ue2_literal &lit) { #endif void upperString(string &s) { - transform(s.begin(), s.end(), s.begin(), (int(*)(int)) mytoupper); + for (auto &c : s) { + c = mytoupper(c); + } } size_t maxStringOverlap(const string &a, const string &b, bool nocase) { From 73610c0b643ea739b251d434e1174d574b771bdc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 May 2016 09:29:44 +1000 Subject: [PATCH 194/218] scratch: don't leave in use after hs_clone_scratch Also updated unit tests to always check hs_alloc_scratch()'s return value. --- src/scratch.c | 6 +- unit/hyperscan/arg_checks.cpp | 149 ++++++++++++++++++++---------- unit/hyperscan/behaviour.cpp | 101 +++++++++++++------- unit/hyperscan/extparam.cpp | 11 ++- unit/hyperscan/identical.cpp | 11 ++- unit/hyperscan/multi.cpp | 27 ++++-- unit/hyperscan/order.cpp | 26 ++++-- unit/hyperscan/scratch_in_use.cpp | 9 +- unit/hyperscan/scratch_op.cpp | 17 ++-- unit/hyperscan/single.cpp | 29 ++++-- unit/hyperscan/som.cpp | 8 +- unit/hyperscan/stream_op.cpp | 50 ++++++---- 12 files changed, 293 insertions(+), 151 deletions(-) diff --git a/src/scratch.c b/src/scratch.c index b496833a5..d8742e7dd 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -129,7 +129,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { *s = *proto; s->magic = SCRATCH_MAGIC; - s->in_use = 1; + s->in_use = 0; s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; @@ -357,9 +357,10 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { } } else { hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ + unmarkScratchInUse(*scratch); } - unmarkScratchInUse(*scratch); + assert(!(*scratch)->in_use); return HS_SUCCESS; } @@ -376,6 +377,7 @@ hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest) { return ret; } + assert(!(*dest)->in_use); return HS_SUCCESS; } diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index 6d4e5fa9a..d277a26bf 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -404,7 +404,8 @@ TEST(HyperscanArgChecks, ScanStreamNoStreamID) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -430,8 +431,10 @@ TEST(HyperscanArgChecks, ScanStreamNoData) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_close_stream(stream, scratch, dummy_cb, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream, scratch, dummy_cb, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -457,8 +460,10 @@ TEST(HyperscanArgChecks, ScanStreamNoScratch) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_close_stream(stream, scratch, dummy_cb, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream, scratch, dummy_cb, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -479,7 +484,8 @@ TEST(HyperscanArgChecks, CloseStreamNoStream) { ASSERT_NE(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -544,7 +550,8 @@ TEST(HyperscanArgChecks, CloseStreamNoMatchNoStream) { ASSERT_NE(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -588,7 +595,8 @@ TEST(HyperscanArgChecks, ChangeStreamContext) { // teardown hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } // hs_reset_stream: Call with no stream id @@ -606,7 +614,8 @@ TEST(HyperscanArgChecks, ResetStreamNoId) { err = hs_reset_stream(nullptr, 0, scratch, dummy_cb, nullptr); ASSERT_EQ(HS_INVALID, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -632,7 +641,8 @@ TEST(HyperscanArgChecks, ResetStreamNoScratch) { ASSERT_EQ(HS_SUCCESS, err); hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -664,7 +674,8 @@ TEST(HyperscanArgChecks, CopyStreamNoToId) { ASSERT_EQ(HS_SUCCESS, err); hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -687,8 +698,10 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoToId) { err = hs_reset_and_copy_stream(nullptr, stream, scratch, nullptr, nullptr); ASSERT_EQ(HS_INVALID, err); - hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -711,8 +724,10 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoFromId) { err = hs_reset_and_copy_stream(stream, nullptr, scratch, nullptr, nullptr); ASSERT_EQ(HS_INVALID, err); - hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -735,8 +750,10 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamSameToId) { err = hs_reset_and_copy_stream(stream, stream, scratch, nullptr, nullptr); ASSERT_EQ(HS_INVALID, err); - hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -764,9 +781,12 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoCallbackOrScratch) { err = hs_reset_and_copy_stream(stream_to, stream, nullptr, nullptr, nullptr); ASSERT_EQ(HS_SUCCESS, err); - hs_close_stream(stream_to, scratch, nullptr, nullptr); - hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream_to, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_close_stream(stream, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -795,9 +815,12 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) { nullptr); ASSERT_EQ(HS_INVALID, err); - hs_close_stream(stream_to, scratch, nullptr, nullptr); - hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream_to, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_close_stream(stream, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -830,9 +853,12 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamDiffDb) { nullptr); ASSERT_EQ(HS_INVALID, err); - hs_close_stream(stream_to, scratch, nullptr, nullptr); - hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_close_stream(stream_to, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_close_stream(stream, scratch, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); hs_free_database(db2); } @@ -854,7 +880,8 @@ TEST(HyperscanArgChecks, ScanBlockNoDatabase) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -878,7 +905,8 @@ TEST(HyperscanArgChecks, ScanBlockBrokenDatabaseMagic) { ASSERT_EQ(HS_INVALID, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); free(db); } @@ -902,7 +930,8 @@ TEST(HyperscanArgChecks, ScanBlockBrokenDatabaseVersion) { ASSERT_EQ(HS_DB_VERSION_ERROR, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -926,7 +955,8 @@ TEST(HyperscanArgChecks, ScanBlockBrokenDatabaseBytecode) { ASSERT_EQ(HS_INVALID, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -947,7 +977,8 @@ TEST(HyperscanArgChecks, ScanBlockStreamingDatabase) { ASSERT_EQ(HS_DB_MODE_ERROR, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -967,7 +998,8 @@ TEST(HyperscanArgChecks, ScanBlockVectoredDatabase) { ASSERT_EQ(HS_DB_MODE_ERROR, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -989,7 +1021,8 @@ TEST(HyperscanArgChecks, ScanBlockNoData) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1026,7 +1059,8 @@ TEST(HyperscanArgChecks, ScanBlockNoHandler) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1049,7 +1083,8 @@ TEST(HyperscanArgChecks, ScanVectorNoDatabase) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1075,7 +1110,8 @@ TEST(HyperscanArgChecks, ScanVectorBrokenDatabaseMagic) { ASSERT_EQ(HS_INVALID, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); free(db); } @@ -1101,7 +1137,8 @@ TEST(HyperscanArgChecks, ScanVectorBrokenDatabaseVersion) { ASSERT_EQ(HS_DB_VERSION_ERROR, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1127,7 +1164,8 @@ TEST(HyperscanArgChecks, ScanVectorBrokenDatabaseBytecode) { ASSERT_EQ(HS_INVALID, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1150,7 +1188,8 @@ TEST(HyperscanArgChecks, ScanVectorStreamingDatabase) { ASSERT_EQ(HS_DB_MODE_ERROR, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1172,7 +1211,8 @@ TEST(HyperscanArgChecks, ScanVectorBlockDatabase) { ASSERT_EQ(HS_DB_MODE_ERROR, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1195,7 +1235,8 @@ TEST(HyperscanArgChecks, ScanVectorNoDataArray) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1218,7 +1259,8 @@ TEST(HyperscanArgChecks, ScanVectorNoDataBlock) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1240,7 +1282,8 @@ TEST(HyperscanArgChecks, ScanVectorNoLenArray) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1281,7 +1324,8 @@ TEST(HyperscanArgChecks, ScanVectorNoHandler) { EXPECT_NE(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1421,7 +1465,8 @@ TEST(HyperscanArgChecks, AllocScratchBadDatabaseCRC) { hs_scratch_t *scratch = nullptr; err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); // for want of a better case, corrupt the "middle byte" of the database. char *mid = (char *)db + len/2; @@ -1959,7 +2004,8 @@ TEST(HyperscanArgChecks, ScratchSizeNoSize) { err = hs_scratch_size(scratch, nullptr); ASSERT_EQ(HS_INVALID, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -2039,7 +2085,8 @@ TEST(HyperscanArgChecks, ScanStreamBadScratch) { ASSERT_TRUE(scratch != nullptr); hs_close_stream(stream, scratch, nullptr, nullptr); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); free(local_garbage); } @@ -2073,7 +2120,8 @@ TEST(HyperscanArgChecks, ResetStreamBadScratch) { ASSERT_TRUE(scratch != nullptr); hs_close_stream(stream, scratch, nullptr, nullptr); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); free(local_garbage); } @@ -2116,7 +2164,8 @@ TEST(HyperscanArgChecks, ScanFreedScratch) { err = hs_alloc_scratch(db, &scratch); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(scratch != NULL); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); err = hs_scan(db, "data", 4, 0, scratch, dummy_cb, 0); ASSERT_EQ(HS_INVALID, err); EXPECT_NE(HS_SCAN_TERMINATED, err); diff --git a/unit/hyperscan/behaviour.cpp b/unit/hyperscan/behaviour.cpp index 98371c863..a816969c5 100644 --- a/unit/hyperscan/behaviour.cpp +++ b/unit/hyperscan/behaviour.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -118,7 +118,8 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) { ASSERT_EQ(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -197,7 +198,8 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) { } // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -287,7 +289,8 @@ TEST_P(HyperscanScanGigabytesMatch, BlockMatch) { } // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -349,7 +352,8 @@ TEST(HyperscanTestBehaviour, StreamingThereCanBeOnlyOne) { ASSERT_EQ(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -379,7 +383,8 @@ TEST(HyperscanTestBehaviour, BlockThereCanBeOnlyOne) { EXPECT_EQ(1U, matchCount); // only one match // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -425,7 +430,8 @@ TEST_P(HyperscanLiteralLengthTest, FloatingBlock) { EXPECT_EQ(0U, matchCount); // no matches // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -463,7 +469,8 @@ TEST_P(HyperscanLiteralLengthTest, AnchoredBlock) { // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -505,7 +512,8 @@ TEST_P(CallbackReturnStop, Block) { ASSERT_EQ(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -543,7 +551,8 @@ TEST_P(CallbackReturnStop, Streaming) { ASSERT_EQ(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -575,7 +584,8 @@ TEST_P(CallbackReturnStop, Vectored) { ASSERT_EQ(HS_SCAN_TERMINATED, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -642,7 +652,8 @@ TEST(HyperscanTestBehaviour, SerializedDogfood1) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(len, lastMatchTo); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db2); } @@ -734,7 +745,8 @@ TEST(HyperscanTestBehaviour, SerializedDogfood2) { ASSERT_EQ(len2, lastMatchTo); ASSERT_EQ(ids[3], lastMatchId); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db2); } @@ -805,7 +817,8 @@ TEST(HyperscanTestBehaviour, SerializedDogfood3) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(len, lastMatchTo); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); free(mem); } @@ -840,7 +853,8 @@ TEST(HyperscanTestBehaviour, CloseStreamMatch) { EXPECT_EQ(1U, matchCount); // our match was returned // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -876,7 +890,8 @@ TEST(HyperscanTestBehaviour, NoMainCB) { EXPECT_EQ(1U, matchCount); // our match was returned // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -913,7 +928,8 @@ TEST(HyperscanTestBehaviour, CloseStreamNoMatch) { EXPECT_EQ(0U, matchCount); // no match was returned // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -948,7 +964,8 @@ TEST(HyperscanTestBehaviour, CloseStreamAfterTermination) { ASSERT_EQ(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -976,7 +993,8 @@ TEST(HyperscanTestBehaviour, Vectored1) { EXPECT_EQ(1U, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1004,7 +1022,8 @@ TEST(HyperscanTestBehaviour, Vectored2) { EXPECT_EQ(1U, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1032,7 +1051,8 @@ TEST(HyperscanTestBehaviour, Vectored3) { EXPECT_EQ(1U, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1060,7 +1080,8 @@ TEST(HyperscanTestBehaviour, Vectored4) { EXPECT_EQ(1U, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1088,7 +1109,8 @@ TEST(HyperscanTestBehaviour, Vectored5) { EXPECT_EQ(0U, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1116,7 +1138,8 @@ TEST(HyperscanTestBehaviour, Vectored6) { EXPECT_EQ(1U, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1144,7 +1167,8 @@ TEST(HyperscanTestBehaviour, Vectored7) { EXPECT_EQ(1U, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1189,7 +1213,8 @@ TEST(regression, UE_1005) { != c.matches.end()); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1293,7 +1318,8 @@ TEST(regression, UE_2762) { c.matches.end()); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1333,7 +1359,8 @@ TEST(regression, UE_2763) { c.matches.end()); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1378,7 +1405,8 @@ TEST(regression, UE_2798) { c.matches.end()); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1403,7 +1431,8 @@ TEST(PcreSpace, NewPcre) { EXPECT_EQ(data.size(), matchCount); // all are spaces // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1428,7 +1457,8 @@ TEST(PcreSpace, NewPcreClass) { EXPECT_EQ(data.size(), matchCount); // all are spaces // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1453,7 +1483,8 @@ TEST(PcreSpace, NewPcreNeg) { EXPECT_EQ(0, matchCount); // no matches, all are spaces // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1478,7 +1509,8 @@ TEST(PcreSpace, NewPcreClassNeg) { EXPECT_EQ(0, matchCount); // no matches, all are spaces // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -1502,7 +1534,8 @@ TEST(Parser, NewlineTerminatedComment) { EXPECT_EQ(1, matchCount); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } diff --git a/unit/hyperscan/extparam.cpp b/unit/hyperscan/extparam.cpp index ba64132e3..e9919be61 100644 --- a/unit/hyperscan/extparam.cpp +++ b/unit/hyperscan/extparam.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -70,7 +70,8 @@ TEST(ExtParam, LargeMinOffset) { ASSERT_EQ(1U, c.matches.size()); ASSERT_EQ(MatchRecord(100000, 0), c.matches[0]); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -116,7 +117,8 @@ TEST(ExtParam, LargeExactOffset) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(0U, c.matches.size()); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -153,6 +155,7 @@ TEST(ExtParam, LargeMinLength) { ASSERT_EQ(1U, c.matches.size()); ASSERT_EQ(MatchRecord(110000, 0), c.matches[0]); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } diff --git a/unit/hyperscan/identical.cpp b/unit/hyperscan/identical.cpp index a4f1fe447..cbeb02422 100644 --- a/unit/hyperscan/identical.cpp +++ b/unit/hyperscan/identical.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -64,7 +64,8 @@ TEST_P(IdenticalTest, Block) { record_cb, &cb); ASSERT_EQ(HS_SUCCESS, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); ASSERT_EQ(patterns.size(), cb.matches.size()); @@ -111,7 +112,8 @@ TEST_P(IdenticalTest, Stream) { err = hs_close_stream(stream, scratch, record_cb, &cb); ASSERT_EQ(HS_SUCCESS, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); ASSERT_EQ(patterns.size(), cb.matches.size()); @@ -151,7 +153,8 @@ TEST_P(IdenticalTest, Vectored) { err = hs_scan_vector(db, data, datalen, 1, 0, scratch, record_cb, &cb); ASSERT_EQ(HS_SUCCESS, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); ASSERT_EQ(patterns.size(), cb.matches.size()); diff --git a/unit/hyperscan/multi.cpp b/unit/hyperscan/multi.cpp index db8d5715f..d43ce3274 100644 --- a/unit/hyperscan/multi.cpp +++ b/unit/hyperscan/multi.cpp @@ -67,7 +67,8 @@ TEST(MMAdaptor, norm_cont1) { // UE-901 ASSERT_EQ(MatchRecord(12, 31), c.matches[2]); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MMAdaptor, norm_cont2) { @@ -99,7 +100,8 @@ TEST(MMAdaptor, norm_cont2) { ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(28, 31))); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MMAdaptor, norm_halt1) { @@ -129,7 +131,8 @@ TEST(MMAdaptor, norm_halt1) { ASSERT_EQ(MatchRecord(4, 30), c.matches[0]); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MMAdaptor, norm_halt2) { // UE-901 @@ -159,7 +162,8 @@ TEST(MMAdaptor, norm_halt2) { // UE-901 ASSERT_EQ(MatchRecord(20, 30), c.matches[0]); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MMAdaptor, high_cont1) { // UE-901 @@ -190,7 +194,8 @@ TEST(MMAdaptor, high_cont1) { // UE-901 ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(12, 31))); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MMAdaptor, high_cont2) { @@ -221,7 +226,8 @@ TEST(MMAdaptor, high_cont2) { ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(28, 31))); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MMAdaptor, high_halt1) { @@ -251,7 +257,8 @@ TEST(MMAdaptor, high_halt1) { ASSERT_EQ(MatchRecord(4, 30), c.matches[0]); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MMAdaptor, high_halt2) { @@ -282,7 +289,8 @@ TEST(MMAdaptor, high_halt2) { || MatchRecord(28, 31) == c.matches[0]); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } TEST(MPV, UE_2395) { @@ -322,5 +330,6 @@ TEST(MPV, UE_2395) { ASSERT_EQ(300, seen); hs_free_database(db); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } diff --git a/unit/hyperscan/order.cpp b/unit/hyperscan/order.cpp index 4c44b4bb7..b052e4b49 100644 --- a/unit/hyperscan/order.cpp +++ b/unit/hyperscan/order.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -91,7 +91,8 @@ TEST(order, ordering1) { EXPECT_EQ(5U, countMatchesById(c.matches, 4)); EXPECT_EQ(5U, countMatchesById(c.matches, 5)); ASSERT_TRUE(matchesOrdered(c.matches)); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -122,7 +123,8 @@ TEST(order, ordering2) { EXPECT_EQ(5U, countMatchesById(c.matches, 4)); EXPECT_EQ(5U, countMatchesById(c.matches, 5)); ASSERT_TRUE(matchesOrdered(c.matches)); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -152,7 +154,8 @@ TEST(order, ordering3) { EXPECT_EQ(5U, countMatchesById(c.matches, 4)); EXPECT_EQ(0U, countMatchesById(c.matches, 5)); ASSERT_TRUE(matchesOrdered(c.matches)); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -182,7 +185,8 @@ TEST(order, ordering4) { EXPECT_EQ(0U, countMatchesById(c.matches, 4)); EXPECT_EQ(0U, countMatchesById(c.matches, 5)); ASSERT_TRUE(matchesOrdered(c.matches)); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -225,7 +229,8 @@ TEST(order, ordering5) { c.matches.clear(); hs_close_stream(stream, scratch, record_cb, (void *)&c); } - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -267,7 +272,8 @@ TEST(order, ordering6) { c.matches.clear(); hs_close_stream(stream, scratch, record_cb, (void *)&c); } - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -308,7 +314,8 @@ TEST(order, ordering7) { c.matches.clear(); hs_close_stream(stream, scratch, record_cb, (void *)&c); } - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -349,7 +356,8 @@ TEST(order, ordering8) { c.matches.clear(); hs_close_stream(stream, scratch, record_cb, (void *)&c); } - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } diff --git a/unit/hyperscan/scratch_in_use.cpp b/unit/hyperscan/scratch_in_use.cpp index ddd4bf4e7..a9e8c8004 100644 --- a/unit/hyperscan/scratch_in_use.cpp +++ b/unit/hyperscan/scratch_in_use.cpp @@ -83,7 +83,8 @@ void runBlockTest(match_event_handler cb_func) { ASSERT_EQ(1, rc.matches); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } // Generic streaming mode test that uses the given scan callback. @@ -112,7 +113,8 @@ void runStreamingTest(match_event_handler cb_func) { // teardown hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } // Generic vectored mode test that uses the given scan callback. @@ -139,7 +141,8 @@ void runVectoredTest(match_event_handler cb_func) { ASSERT_EQ(1, rc.matches); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } static diff --git a/unit/hyperscan/scratch_op.cpp b/unit/hyperscan/scratch_op.cpp index b3d3ddd99..338a0427c 100644 --- a/unit/hyperscan/scratch_op.cpp +++ b/unit/hyperscan/scratch_op.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -75,7 +75,8 @@ TEST(scratch, testAlloc) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(last_alloc_size, curr_size); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); hs_set_allocator(nullptr, nullptr); } @@ -114,7 +115,8 @@ TEST(scratch, testScratchAlloc) { ASSERT_EQ(allocated_count, curr_size); ASSERT_EQ(allocated_count_b, old_b); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); hs_set_allocator(nullptr, nullptr); @@ -185,7 +187,8 @@ TEST(scratch, testScratchRealloc) { ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(last_alloc_size, curr_size); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); hs_free_database(db2); hs_set_scratch_allocator(nullptr, nullptr); @@ -217,7 +220,8 @@ TEST(scratch, tooSmallForDatabase) { err = hs_scan(db2, "somedata", 8, 0, scratch, dummy_cb, nullptr); ASSERT_EQ(HS_SUCCESS, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db2); } @@ -252,7 +256,8 @@ TEST(scratch, tooSmallForDatabase2) { err = hs_close_stream(stream, scratch, nullptr, nullptr); ASSERT_EQ(HS_SUCCESS, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db2); } diff --git a/unit/hyperscan/single.cpp b/unit/hyperscan/single.cpp index 4bb0f2048..029d223ae 100644 --- a/unit/hyperscan/single.cpp +++ b/unit/hyperscan/single.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -157,7 +157,8 @@ class HyperscanTestRuntime } // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } virtual void zeroLengthScan() { @@ -195,7 +196,8 @@ class HyperscanTestRuntime } // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } // Can we allocate and clone scratch @@ -212,8 +214,10 @@ class HyperscanTestRuntime ASSERT_EQ(HS_SUCCESS, err); EXPECT_TRUE(cloned != nullptr); - hs_free_scratch(scratch); - hs_free_scratch(cloned); + err = hs_free_scratch(scratch); + EXPECT_EQ(HS_SUCCESS, err); + err = hs_free_scratch(cloned); + EXPECT_EQ(HS_SUCCESS, err); } // Can we scan with the database (ignoring the matches, and using @@ -412,7 +416,8 @@ class HyperscanTestCrossCompile // that platform produces HS_SUCCESS. if (err == HS_SUCCESS) { // host platform. - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); } else { ASSERT_EQ(HS_DB_PLATFORM_ERROR, err); ASSERT_TRUE(!scratch); @@ -519,7 +524,8 @@ TEST_P(HyperscanTestMatchTerminate, MoreThanOne) { ASSERT_EQ(HS_SUCCESS, err) << "hs_scan didn't return HS_SCAN_TERMINATED"; ASSERT_LT(1, count) << "Number of matches returned was not greater than 1."; - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -538,7 +544,8 @@ TEST_P(HyperscanTestMatchTerminate, Block) { << "hs_scan didn't return HS_SCAN_TERMINATED"; ASSERT_EQ(1, count) << "Number of matches returned was not 1."; - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -563,7 +570,8 @@ TEST_P(HyperscanTestMatchTerminate, StreamWhole) { err = hs_close_stream(stream, scratch, terminateHandler, &count); ASSERT_EQ(1, count) << "Number of matches returned was not 1."; - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -591,7 +599,8 @@ TEST_P(HyperscanTestMatchTerminate, StreamByteByByte) { err = hs_close_stream(stream, scratch, terminateHandler, &count); ASSERT_EQ(1, count) << "Number of matches returned was not 1."; - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } diff --git a/unit/hyperscan/som.cpp b/unit/hyperscan/som.cpp index cc1077d28..bf2d7c427 100644 --- a/unit/hyperscan/som.cpp +++ b/unit/hyperscan/som.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -132,7 +132,8 @@ TEST_P(SomTest, PastHorizon) { ASSERT_EQ(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -190,7 +191,8 @@ TEST_P(SomTest, NearHorizon) { ASSERT_EQ(HS_SUCCESS, err); // teardown - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } diff --git a/unit/hyperscan/stream_op.cpp b/unit/hyperscan/stream_op.cpp index a78e08a1b..48cb6b8d2 100644 --- a/unit/hyperscan/stream_op.cpp +++ b/unit/hyperscan/stream_op.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,7 +82,8 @@ TEST(StreamUtil, reset1) { ASSERT_EQ(MatchRecord(1009, 0), c2.matches[0]); hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -122,7 +123,8 @@ TEST(StreamUtil, reset2) { ASSERT_EQ(MatchRecord(9, 0), c2.matches[0]); hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -150,7 +152,8 @@ TEST(StreamUtil, reset_matches) { ASSERT_EQ(MatchRecord(9, 0), c.matches[0]); hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -198,7 +201,8 @@ TEST(StreamUtil, copy1) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -241,7 +245,8 @@ TEST(StreamUtil, copy2) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -284,7 +289,8 @@ TEST(StreamUtil, copy_reset1) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -328,7 +334,8 @@ TEST(StreamUtil, copy_reset2) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -380,7 +387,8 @@ TEST(StreamUtil, copy_reset3) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -427,7 +435,8 @@ TEST(StreamUtil, copy_reset4) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -483,7 +492,8 @@ TEST(StreamUtil, copy_reset5) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -519,7 +529,8 @@ TEST(StreamUtil, copy_reset_matches) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -554,7 +565,8 @@ TEST(StreamUtil, size) { hs_set_allocator(nullptr, nullptr); hs_close_stream(stream, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); } @@ -629,7 +641,8 @@ TEST(StreamUtil, Alloc) { hs_close_stream(stream, scratch, record_cb, (void *)&c); ASSERT_EQ(alloc_called, 0); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); hs_free_compile_error(compile_err); hs_set_allocator(nullptr, nullptr); @@ -670,7 +683,8 @@ TEST(StreamUtil, MoreAlloc) { hs_close_stream(stream, scratch, nullptr, nullptr); ASSERT_EQ(alloc_called, 2); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); ASSERT_EQ(alloc_called, 1); hs_free_database(db); ASSERT_EQ(alloc_called, 0); @@ -702,7 +716,8 @@ TEST(StreamUtil, BadStreamAlloc) { err = hs_open_stream(db, 0, &stream); ASSERT_NE(HS_SUCCESS, err); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); hs_free_compile_error(compile_err); hs_set_stream_allocator(nullptr, nullptr); @@ -758,7 +773,8 @@ TEST(StreamUtil, StreamAllocUsage) { hs_close_stream(stream, scratch, nullptr, nullptr); hs_close_stream(stream2, scratch, nullptr, nullptr); hs_close_stream(stream3, scratch, nullptr, nullptr); - hs_free_scratch(scratch); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); hs_free_database(db); hs_free_compile_error(compile_err); hs_set_allocator(nullptr, nullptr); From 6899cab37038f2493de147a264845f559ca93736 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 May 2016 13:51:50 +1000 Subject: [PATCH 195/218] unit-internal: fix FDR issues reported by ASan FDR's streaming mode now assumes that it is safe to read 16 bytes before the end of the history buffer, and this was not reflected in the unit tests. --- unit/internal/fdr.cpp | 57 ++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index ddfa8bb15..c66ab4c5b 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -330,14 +330,32 @@ TEST_P(FDRp, NoRepeat3) { EXPECT_EQ(match(31, 32, 0), matches[0]); } +/** + * \brief Helper function wrapping the FDR streaming call that ensures it is + * always safe to read 16 bytes before the end of the history buffer. + */ +static +hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, + const u8 *buf, size_t len, size_t start, + HWLMCallback cb, void *ctxt, hwlm_group_t groups, + u8 *stream_state) { + array wrapped_history = {{'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}}; + if (hlen < 16) { + u8 *new_hbuf = wrapped_history.data() + 16 - hlen; + memcpy(new_hbuf, hbuf, hlen); + hbuf = new_hbuf; + } + return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups, + stream_state); +} TEST_P(FDRp, SmallStreaming) { const u32 hint = GetParam(); SCOPED_TRACE(hint); - vector lits; - lits.push_back(hwlmLiteral("a", 1, 1)); - lits.push_back(hwlmLiteral("aardvark", 0, 10)); + vector lits = {hwlmLiteral("a", 1, 1), + hwlmLiteral("aardvark", 0, 10)}; auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); @@ -347,8 +365,8 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(1, 1, 1)); expected.push_back(match(2, 2, 1)); - fdrExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0, - decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0, + decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i]); } @@ -359,8 +377,8 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(6, 6, 1)); expected.push_back(match(1, 8, 10)); - fdrExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5, - 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5, + 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 4); @@ -371,11 +389,10 @@ TEST_P(FDRp, SmallStreaming) { TEST_P(FDRp, SmallStreaming2) { const u32 hint = GetParam(); SCOPED_TRACE(hint); - vector lits; - lits.push_back(hwlmLiteral("a",1,1)); - lits.push_back(hwlmLiteral("kk",1,2)); - lits.push_back(hwlmLiteral("aardvark", 0,10)); + vector lits = {hwlmLiteral("a", 1, 1), + hwlmLiteral("kk", 1, 2), + hwlmLiteral("aardvark", 0, 10)}; auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); @@ -388,9 +405,9 @@ TEST_P(FDRp, SmallStreaming2) { expected.push_back(match(13,14,2)); expected.push_back(match(14,15,2)); - fdrExecStreaming(fdr.get(), (const u8 *)"foobar", 6, - (const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches, - HWLM_ALL_GROUPS, nullptr); + safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6, + (const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches, + HWLM_ALL_GROUPS, nullptr); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 6); @@ -521,9 +538,9 @@ TEST_P(FDRp, Stream1) { // check matches vector matches; - fdrStatus = fdrExecStreaming(fdr.get(), (const u8 *)data1, data_len1, - (const u8 *)data2, data_len2, 0, - decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming( + fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, + 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); ASSERT_EQ(0, fdrStatus); ASSERT_EQ(4U, matches.size()); @@ -766,9 +783,9 @@ TEST(FDR, FDRTermS) { // check matches vector matches; - fdrStatus = fdrExecStreaming(fdr.get(), (const u8 *)data1, data_len1, - (const u8 *)data2, data_len2, 0, - decentCallbackT, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming( + fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, + 0, decentCallbackT, &matches, HWLM_ALL_GROUPS, nullptr); ASSERT_EQ(HWLM_TERMINATED, fdrStatus); ASSERT_EQ(1U, matches.size()); From ed772380c05996cd71a77e18d6bec2081d49f0e0 Mon Sep 17 00:00:00 2001 From: Mohammad Abdul Awal Date: Thu, 31 Mar 2016 11:28:42 +0100 Subject: [PATCH 196/218] teddy: remove python codegen, refactor code Major cleanup of the Teddy runtime code. Removes python code generation, splits AVX2 models into their own file, improves readability. --- .gitignore | 4 - CMakeLists.txt | 14 +- src/fdr/CMakeLists.txt | 33 - src/fdr/arch.py | 58 -- src/fdr/autogen.py | 118 --- src/fdr/autogen_utils.py | 120 --- src/fdr/engine_description.h | 3 +- src/fdr/fdr.c | 30 +- src/fdr/fdr_engine_description.cpp | 2 +- src/fdr/teddy.c | 738 +++++++++++++---- src/fdr/teddy.h | 108 +++ src/fdr/teddy_autogen.py | 773 ------------------ src/fdr/teddy_avx2.c | 1110 ++++++++++++++++++++++++++ src/fdr/teddy_engine_description.cpp | 27 +- src/fdr/teddy_runtime_common.h | 256 ++++++ 15 files changed, 2111 insertions(+), 1283 deletions(-) delete mode 100644 src/fdr/CMakeLists.txt delete mode 100755 src/fdr/arch.py delete mode 100755 src/fdr/autogen.py delete mode 100755 src/fdr/autogen_utils.py create mode 100644 src/fdr/teddy.h delete mode 100755 src/fdr/teddy_autogen.py create mode 100644 src/fdr/teddy_avx2.c create mode 100644 src/fdr/teddy_runtime_common.h diff --git a/.gitignore b/.gitignore index 6e50ce45a..4d984534a 100644 --- a/.gitignore +++ b/.gitignore @@ -46,10 +46,6 @@ sqlite3 src/config.h src/config.h.in src/hs_version.h -src/fdr/fdr_autogen.c -src/fdr/fdr_autogen_compiler.cpp -src/fdr/teddy_autogen.c -src/fdr/teddy_autogen_compiler.cpp src/parser/Parser.cpp # Generated PCRE files diff --git a/CMakeLists.txt b/CMakeLists.txt index ad7bb3f92..2bc68474f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -357,11 +357,6 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") -# include the autogen targets -add_subdirectory(src/fdr) - -include_directories(${PROJECT_BINARY_DIR}/src/fdr) - if(NOT WIN32) set(RAGEL_C_FLAGS "-Wno-unused") endif() @@ -381,8 +376,6 @@ SET(hs_HEADERS ) install(FILES ${hs_HEADERS} DESTINATION include/hs) -set(fdr_autogen_targets autogen_runtime autogen_teddy_runtime) - set (hs_exec_SRCS ${hs_HEADERS} src/hs_version.h @@ -400,7 +393,10 @@ set (hs_exec_SRCS src/fdr/flood_runtime.h src/fdr/fdr_loadval.h src/fdr/teddy.c + src/fdr/teddy_avx2.c + src/fdr/teddy.h src/fdr/teddy_internal.h + src/fdr/teddy_runtime_common.h src/hwlm/hwlm.c src/hwlm/hwlm.h src/hwlm/hwlm_internal.h @@ -929,11 +925,9 @@ set (LIB_VERSION ${HS_VERSION}) set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}) add_library(hs_exec OBJECT ${hs_exec_SRCS}) -add_dependencies(hs_exec ${fdr_autogen_targets}) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) -add_dependencies(hs_exec_shared ${fdr_autogen_targets}) set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) endif() @@ -964,7 +958,6 @@ endif() add_library(hs STATIC ${hs_SRCS} $) add_dependencies(hs ragel_Parser) -add_dependencies(hs autogen_teddy_compiler) if (NOT BUILD_SHARED_LIBS) install(TARGETS hs DESTINATION lib) @@ -973,7 +966,6 @@ endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) add_library(hs_shared SHARED ${hs_SRCS} $) add_dependencies(hs_shared ragel_Parser) - add_dependencies(hs_shared autogen_compiler autogen_teddy_compiler) set_target_properties(hs_shared PROPERTIES OUTPUT_NAME hs VERSION ${LIB_VERSION} diff --git a/src/fdr/CMakeLists.txt b/src/fdr/CMakeLists.txt deleted file mode 100644 index 7bbf82ffc..000000000 --- a/src/fdr/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -# The set of rules and other nastiness for generating FDR/Teddy source - -# we need to add these as explicit dependencies -set(AUTOGEN_PY_FILES - arch.py - autogen.py - autogen_utils.py - teddy_autogen.py -) - -function(fdr_autogen type out) - add_custom_command ( - COMMENT "AUTOGEN ${out}" - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${out}" - COMMAND ${PYTHON} "${CMAKE_CURRENT_SOURCE_DIR}/autogen.py" ${type} > "${CMAKE_CURRENT_BINARY_DIR}/${out}" - DEPENDS ${AUTOGEN_PY_FILES} - ) - add_custom_target(autogen_${type} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${out}") -endfunction(fdr_autogen) - -#now build the functions -fdr_autogen(runtime fdr_autogen.c) -fdr_autogen(teddy_runtime teddy_autogen.c) -fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp) - -set(fdr_GENERATED_SRC - ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen.c - ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen.c - ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp - PARENT_SCOPE) - -set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE) -include_directories(${CMAKE_CURRENT_BINARY_DIR}) diff --git a/src/fdr/arch.py b/src/fdr/arch.py deleted file mode 100755 index 83a312542..000000000 --- a/src/fdr/arch.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2015, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import autogen_utils - -# wrapper for architectures - -class Arch: - def __init__(self, name, extensions = []): - self.name = name - self.extensions = extensions - self.target = None - - def get_guard(self): - # these defines definitely fall into the "belt-and-suspenders" - # category of paranoia - if (self.guard_list == []): - return "#if 1" - - return "#if " + " && ".join(self.guard_list) - -class X86Arch(Arch): - def __init__(self, name, extensions = []): - Arch.__init__(self, name, extensions) - self.guard_list = [ ] - self.target = "0" - - if "AVX2" in extensions: - self.target += " | HS_CPU_FEATURES_AVX2" - self.guard_list += [ "defined(__AVX2__)" ] - - -arch_x86_64 = X86Arch("x86_64", extensions = [ ]) -arch_x86_64_avx2 = X86Arch("x86_64_avx2", extensions = [ "AVX2" ]) diff --git a/src/fdr/autogen.py b/src/fdr/autogen.py deleted file mode 100755 index a85104870..000000000 --- a/src/fdr/autogen.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2015-2016, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys -from autogen_utils import * -from teddy_autogen import * -from arch import * - -# teddy setup - -def build_teddy_matchers(): - all_matchers = [ ] - - # AVX2 - all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = False) ] - all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = True) ] - for n_msk in range(1, 5): - all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = False, num_masks = n_msk, num_buckets = 16) ] - all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = True, num_masks = n_msk, num_buckets = 16) ] - - # SSE/SSE2/SSSE3 - for n_msk in range(1, 5): - all_matchers += [ MT(arch = arch_x86_64, packed = False, num_masks = n_msk, num_buckets = 8) ] - all_matchers += [ MT(arch = arch_x86_64, packed = True, num_masks = n_msk, num_buckets = 8) ] - - return all_matchers - -def produce_teddy_compiles(l): - print "void getTeddyDescriptions(vector *out) {" - print " static const TeddyEngineDef defns[] = {" - for m in l: - m.produce_compile_call() - print " };" - print " out->clear();" - print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {" - print " out->push_back(TeddyEngineDescription(defns[i]));" - print " }" - print "}" - -# see below - we don't produce our 'zeros' at the point of the teddy runtimes as they -# are linked. So we either generate the function or we don't - then at the point of the -# header in fdr_autogen.c we either generate the header or we #define the zero. - -def produce_teddy_runtimes(l): - # Since we're using -Wmissing-prototypes, we need headers first. - for m in l: - m.produce_guard() - print m.produce_header(visible = True, header_only = True) - m.close_guard() - - for m in l: - m.produce_guard() - m.produce_code() - m.close_guard() - -# see produce_teddy_runtimes() comment for the rationale - -def produce_teddy_headers(l): - for m in l: - m.produce_guard() - print m.produce_header(visible = True, header_only = True) - m.produce_zero_alternative() - -# general utilities - -def make_fdr_function_pointers(matcher_list): - print """ -typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a); -static FDRFUNCTYPE funcs[] = { -""" - all_funcs = " fdr_engine_exec,\n" - all_funcs += ",\n".join([ " %s" % m.get_name() for m in matcher_list ]) - print all_funcs - print """ -}; -""" - -def assign_ids(matcher_list, next_id): - for m in matcher_list: - m.id = next_id - next_id += 1 - return next_id - -# Main entry point - -tm = build_teddy_matchers() -next_id = assign_ids(tm, 1) -if sys.argv[1] == "runtime": - produce_teddy_headers(tm) - make_fdr_function_pointers(tm) -elif sys.argv[1] == "teddy_runtime": - produce_teddy_runtimes(tm) -elif sys.argv[1] == "teddy_compiler": - produce_teddy_compiles(tm) diff --git a/src/fdr/autogen_utils.py b/src/fdr/autogen_utils.py deleted file mode 100755 index 3544bc7b6..000000000 --- a/src/fdr/autogen_utils.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2015-2016, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys - -def fail_out(msg = ""): - print >>sys.stderr, "Internal failure in autogen.py: " + msg - sys.exit(1) - -class IntegerType: - def __init__(self, size): - self.size = size - - def get_name(self): - return { 256: "m256", 128 : "m128", 64 : "u64a", 32 : "u32" , 16 : "u16", 8 : "u8"}[self.size] - - def size_in_bytes(self): - return self.size / 8 - - def zero_expression(self): - return "0" - - def constant_to_string(self, n): - if self.size == 64: - suffix = "ULL" - else: - suffix = "" - return "0x%x%s" % (n & ((1 << self.size) - 1), suffix) - - def lowbits(self, n): - return (1 << n) - 1 - - def highbits(self, n): - return ~(self.lowbits(self.size - n)) - - def lowbit_mask(self, n): - return self.constant_to_string(self.lowbits(n)) - - def lowbit_extract_expr(self, expr_string, n): - return "(%s & %s)" % ( expr_string, self.lowbit_mask(n)) - - def flip_lowbits_expr(self, expr_string, n): - return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n)) - - def bit_extract_expr(self, expr_string, low, high): - lbm = self.lowbit_mask(high - low) - return "((%s >> %d) & %s)" % (expr_string, low, lbm) - - # shifts are +ve if left and -ve if right - def shift_expr(self, expr_string, n): - if n <= -self.size or n >= self.size: - return self.zero_expression() - elif (n > 0): - return "(%s << %d)" % (expr_string, n) - elif (n < 0): - return "(%s >> %d)" % (expr_string, -n) - else: - return "(%s)" % (expr_string) - -class SIMDIntegerType(IntegerType): - def __init__(self, size): - IntegerType.__init__(self, size) - - def zero_expression(self): - return "zeroes128()" - - def lowbit_extract_expr(self, expr_string, n): - if (n <= 32): - tmpType = IntegerType(32) - tmpExpr = "movd(%s)" % expr_string - elif (32 < n <= 64): - tmpType = IntegerType(64) - tmpExpr = "movq(%s)" % expr_string - return tmpType.lowbit_extract_expr(tmpExpr, n) - - def bit_extract_expr(self, expr_string, low, high, flip): - fail_out("Unimplemented bit extract on m128") - - def shift_expr(self, expr_string, n): - if n % 8 != 0: - fail_out("Trying to shift a m128 by a bit granular value") - - # should check that n is divisible by 8 - if n <= -self.size or n >= self.size: - return self.zero_expression() - elif (n > 0): - return "byteShiftLeft128(%s, %s)" % (expr_string, n / 8) - elif (n < 0): - return "byteShiftRight128(%s, %s)" % (expr_string, -n / 8) - else: - return "(%s)" % (expr_string) - - def lowbit_mask(self, n): - if n % 8 != 0: - fail_out("Trying to make a lowbit mask in a m128 by a bit granular value") - return self.shift_expr("ones128()", -(128 - n)) diff --git a/src/fdr/engine_description.h b/src/fdr/engine_description.h index 3c3026c39..09b161796 100644 --- a/src/fdr/engine_description.h +++ b/src/fdr/engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,6 +55,7 @@ class EngineDescription { u32 getNumBuckets() const { return numBuckets; } u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; } u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; } + void setConfirmTopLevelSplit(u32 split) { confirmTopLevelSplit = split; } bool isValidOnTarget(const target_t &target_in) const; virtual u32 getDefaultFloodSuffixLength() const = 0; diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index bd7dbe83d..51a041cc9 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -33,6 +33,7 @@ #include "fdr_loadval.h" #include "fdr_streaming_runtime.h" #include "flood_runtime.h" +#include "teddy.h" #include "teddy_internal.h" #include "util/simd_utils.h" #include "util/simd_utils_ssse3.h" @@ -764,7 +765,34 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr, return HWLM_SUCCESS; } -#include "fdr_autogen.c" +#if defined(__AVX2__) +#define ONLY_AVX2(func) func +#else +#define ONLY_AVX2(func) NULL +#endif + +typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a); +static const FDRFUNCTYPE funcs[] = { + fdr_engine_exec, + ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast), + ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fast), + ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat), + ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat), + ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat), + ONLY_AVX2(fdr_exec_teddy_avx2_msks2_pck_fat), + ONLY_AVX2(fdr_exec_teddy_avx2_msks3_fat), + ONLY_AVX2(fdr_exec_teddy_avx2_msks3_pck_fat), + ONLY_AVX2(fdr_exec_teddy_avx2_msks4_fat), + ONLY_AVX2(fdr_exec_teddy_avx2_msks4_pck_fat), + fdr_exec_teddy_msks1, + fdr_exec_teddy_msks1_pck, + fdr_exec_teddy_msks2, + fdr_exec_teddy_msks2_pck, + fdr_exec_teddy_msks3, + fdr_exec_teddy_msks3_pck, + fdr_exec_teddy_msks4, + fdr_exec_teddy_msks4_pck, +}; #define FAKE_HISTORY_SIZE 16 static const u8 fake_history[FAKE_HISTORY_SIZE]; diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index 103bc2146..5e923b08f 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -57,7 +57,7 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const { void getFdrDescriptions(vector *out) { static const FDREngineDef def = {0, 128, 8, 0, 1, 256}; out->clear(); - out->push_back(FDREngineDescription(def)); + out->emplace_back(def); } static diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index 11df9d699..08b761c0a 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,11 +26,19 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "config.h" +/** \file + * \brief Teddy literal matcher: SSSE3 engine runtime. + */ + +#include "fdr_internal.h" +#include "flood_runtime.h" +#include "teddy.h" +#include "teddy_internal.h" +#include "teddy_runtime_common.h" #include "util/simd_utils.h" #include "util/simd_utils_ssse3.h" -static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { +const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -67,178 +75,584 @@ static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} }; -// Note: p_mask is an output param that initialises a poison mask. -UNUSED static really_inline -m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, - const u8 *buf_history, size_t len_history, - const u32 nMasks) { - union { - u8 val8[16]; - m128 val128; - } u; - u.val128 = zeroes128(); - - if (ptr >= lo) { - u32 avail = (u32)(hi - ptr); - if (avail >= 16) { - *p_mask = load128((const void*)(p_mask_arr[16] + 16)); - return loadu128(ptr); - } - *p_mask = load128((const void*)(p_mask_arr[avail] + 16)); - for (u32 i = 0; i < avail; i++) { - u.val8[i] = ptr[i]; - } - } else { - u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1)); - u32 start = (u32)(lo - ptr); - u32 i; - for (i = start - need; ptr + i < lo; i++) { - u.val8[i] = buf_history[len_history - (lo - (ptr + i))]; - } - u32 end = MIN(16, (u32)(hi - ptr)); - *p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start)); - for (; i < end; i++) { - u.val8[i] = ptr[i]; - } - } - - return u.val128; +#ifdef ARCH_64_BIT +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(isnonzero128(var))) { \ + u64a lo = movq(var); \ + u64a hi = movq(byteShiftRight128(var, 8)); \ + if (unlikely(lo)) { \ + conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(hi)) { \ + conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + } \ +} while (0); +#else +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(isnonzero128(var))) { \ + u32 part1 = movd(var); \ + u32 part2 = movd(byteShiftRight128(var, 4)); \ + u32 part3 = movd(byteShiftRight128(var, 8)); \ + u32 part4 = movd(byteShiftRight128(var, 12)); \ + if (unlikely(part1)) { \ + conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part2)) { \ + conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part3)) { \ + conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part4)) { \ + conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + } \ +} while (0); +#endif + +static really_inline +m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift2x64(val, 4), mask); + return and128(and128(pshufb(maskBase[0*2], lo), + pshufb(maskBase[0*2+1], hi)), p_mask); } +static really_inline +m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask, + m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift2x64(val, 4), mask); + m128 r = prep_conf_teddy_m1(maskBase, p_mask, val); -#if defined(__AVX2__) + m128 res_1 = and128(pshufb(maskBase[1*2], lo), + pshufb(maskBase[1*2+1], hi)); + m128 res_shifted_1 = palignr(res_1, *old_1, 16-1); + *old_1 = res_1; + return and128(and128(r, p_mask), res_shifted_1); +} + +static really_inline +m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, + m128 p_mask, m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift2x64(val, 4), mask); + m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val); -UNUSED static really_inline -m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, - const u8 *buf_history, size_t len_history, - const u32 nMasks) { - m128 p_mask128; - m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks)); - *p_mask = set2x128(p_mask128); - return ret; + m128 res_2 = and128(pshufb(maskBase[2*2], lo), + pshufb(maskBase[2*2+1], hi)); + m128 res_shifted_2 = palignr(res_2, *old_2, 16-2); + *old_2 = res_2; + return and128(r, res_shifted_2); } -static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} -}; +static really_inline +m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, + m128 *old_3, m128 p_mask, m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift2x64(val, 4), mask); + m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val); + + m128 res_3 = and128(pshufb(maskBase[3*2], lo), + pshufb(maskBase[3*2+1], hi)); + m128 res_shifted_3 = palignr(res_3, *old_3, 16-3); + *old_3 = res_3; + return and128(r, res_shifted_3); +} + +hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 1); -UNUSED static really_inline -m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, - const u8 *buf_history, size_t len_history) { - union { - u8 val8[32]; - m256 val256; - } u; - - if (ptr >= lo) { - u32 avail = (u32)(hi - ptr); - if (avail >= 32) { - *p_mask = load256((const void*)(p_mask_arr256[32] + 32)); - return loadu256(ptr); - } - *p_mask = load256((const void*)(p_mask_arr256[avail] + 32)); - for (u32 i = 0; i < avail; i++) { - u.val8[i] = ptr[i]; - } - } else { - // need contains "how many chars to pull from history" - // calculate based on what we need, what we have in the buffer - // and only what we need to make primary confirm work - u32 start = (u32)(lo - ptr); - u32 i; - for (i = start; ptr + i < lo; i++) { - u.val8[i] = buf_history[len_history - (lo - (ptr + i))]; - } - u32 end = MIN(32, (u32)(hi - ptr)); - *p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start)); - for (; i < end; i++) { - u.val8[i] = ptr[i]; - } - } - - return u.val256; + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); + ptr += 16; + } + + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); + m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; } +hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); -#endif // __AVX2__ + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 1); -#define P0(cnd) unlikely(cnd) + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } -#include "fdr.h" -#include "fdr_internal.h" -#include "flood_runtime.h" + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } -#include "fdr_confirm.h" -#include "fdr_confirm_runtime.h" + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } -#include "fdr_loadval.h" -#include "util/bitutils.h" -#include "teddy_internal.h" + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 2); + + m128 res_old_1 = ones128(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), + load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), + load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); + m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), + load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 2); + + m128 res_old_1 = ones128(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), + load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), + load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), + load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } -#include "teddy_autogen.c" + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 3); + + m128 res_old_1 = ones128(); + m128 res_old_2 = ones128(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); + m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones128(), load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 3); + + m128 res_old_1 = ones128(); + m128 res_old_2 = ones128(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones128(), load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 4); + + m128 res_old_1 = ones128(); + m128 res_old_2 = ones128(); + m128 res_old_3 = ones128(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); + m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones128(), load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 4); + + m128 res_old_1 = ones128(); + m128 res_old_2 = ones128(); + m128 res_old_3 = ones128(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones128(), load128(ptr)); + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones128(), load128(ptr + 16)); + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m128 p_mask; + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h new file mode 100644 index 000000000..a0377f60b --- /dev/null +++ b/src/fdr/teddy.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Teddy literal matcher: function declarations. + */ + +#ifndef TEDDY_H_ +#define TEDDY_H_ + +#include "hwlm/hwlm.h" + +struct FDR; // forward declaration from fdr_internal.h +struct FDR_Runtime_Args; + +hwlm_error_t fdr_exec_s1_w128(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_s2_w128(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_s4_w128(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +#if defined(__AVX2__) + +hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, + const struct FDR_Runtime_Args *a); + +#endif /* __AVX2__ */ + +#endif /* TEDDY_H_ */ diff --git a/src/fdr/teddy_autogen.py b/src/fdr/teddy_autogen.py deleted file mode 100755 index 1cada00c6..000000000 --- a/src/fdr/teddy_autogen.py +++ /dev/null @@ -1,773 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2015-2016, Intel Corporation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of Intel Corporation nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys -from autogen_utils import * -from string import Template - -class MT: - def produce_header(self, visible, header_only = False): - s = "" - if not visible: - s += "static never_inline" - s += """ -hwlm_error_t %s(UNUSED const struct FDR *fdr, - UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name() - if header_only: - s += ";" - else: - s += "{" - s += "\n" - return s - - def produce_guard(self): - print self.arch.get_guard() - - def produce_zero_alternative(self): - print """ -#else -#define %s 0 -#endif -""" % self.get_name() - - def close_guard(self): - print "#endif" - - def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False): - if cautious: - caution_string = "VECTORING" - else: - caution_string = "NOT_CAUTIOUS" - conf_split_mask = IntegerType(32).constant_to_string( - self.conf_top_level_split - 1) - if enable_confirmless: - quick_check_string = """ - if (!fdrc->mult) { - u32 id = fdrc->nBitsOrSoleID; - if ((last_match == id) && (fdrc->flags & NoRepeat)) - continue; - last_match = id; - controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt); - continue; - } """ - else: - quick_check_string = "" - if do_bailout: - bailout_string = """ - if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;""" - else: - bailout_string = "" - - return Template(""" -if (P0(!!$CONFVAR)) { - do { - u32 bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR); - u32 byte = bit / $NUM_BUCKETS + $OFFSET; - u32 bitRem = bit % $NUM_BUCKETS; - $BAILOUT_STRING - u32 confSplit = *(ptr+byte) & $SPLIT_MASK; - u32 idx = confSplit * $NUM_BUCKETS + bitRem; - u32 cf = confBase[idx]; - if (!cf) - continue; - fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf); - if (!(fdrc->groups & *control)) - continue; - $QUICK_CHECK_STRING - CautionReason reason = $CAUTION_STRING; - CONF_TYPE v; - const u8 * confirm_loc = ptr + byte - $CONF_PULL_BACK - 7; - if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { - v = lv_u64a(confirm_loc, buf, buf + len); - } else { // r == VECTORING, confirm_loc < buf - u64a histBytes = a->histBytes; - v = lv_u64a_ce(confirm_loc, buf, buf + len); - // stitch together v (which doesn't move) and history (which does) - u32 overhang = buf - confirm_loc; - histBytes >>= 64 - (overhang * 8); - v |= histBytes; - } - confWithBit(fdrc, a, ptr - buf + byte, $CONF_PULL_BACK, control, &last_match, v); - } while(P0(!!$CONFVAR)); - if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { - *a->groups = controlVal; - return HWLM_TERMINATED; - } -}""").substitute(CONFVAR = conf_var_name, - CONFVAR_SIZE = conf_var_size, - NUM_BUCKETS = self.num_buckets, - OFFSET = offset, - SPLIT_MASK = conf_split_mask, - QUICK_CHECK_STRING = quick_check_string, - BAILOUT_STRING = bailout_string, - CAUTION_STRING = caution_string, - CONF_PULL_BACK = self.conf_pull_back) - - def produce_confirm(self, iter, var_name, offset, bits, cautious = True): - if self.packed: - print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False) - else: - if cautious: - caution_string = "VECTORING" - else: - caution_string = "NOT_CAUTIOUS" - - print " if (P0(!!%s)) {" % var_name - print " do {" - if bits == 64: - print " u32 bit = findAndClearLSB_64(&%s);" % (var_name) - else: - print " u32 bit = findAndClearLSB_32(&%s);" % (var_name) - print " u32 byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset) - print " u32 idx = bit %% %d;" % self.num_buckets - print " u32 cf = confBase[idx];" - print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);" - print " if (!(fdrc->groups & *control))" - print " continue;" - print """ - CautionReason reason = %s; - CONF_TYPE v; - const u8 * confirm_loc = ptr + byte - 7; - if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { - v = lv_u64a(confirm_loc, buf, buf + len); - } else { // r == VECTORING, confirm_loc < buf - u64a histBytes = a->histBytes; - v = lv_u64a_ce(confirm_loc, buf, buf + len); - // stitch together v (which doesn't move) and history (which does) - u32 overhang = buf - confirm_loc; - histBytes >>= 64 - (overhang * 8); - v |= histBytes; - }""" % (caution_string) - if self.num_masks == 1: - print " confWithBit1(fdrc, a, ptr - buf + byte, control, &last_match, v);" - else: - print " confWithBitMany(fdrc, a, ptr - buf + byte, %s, control, &last_match, v);" % (caution_string) - print " } while(P0(!!%s));" % var_name - print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {" - print " *a->groups = controlVal;" - print " return HWLM_TERMINATED;" - print " }" - print " }" - - def produce_needed_temporaries(self, max_iterations): - print " m128 p_mask;" - for iter in range(0, max_iterations): - print " m128 val_%d;" % iter - print " m128 val_%d_lo;" % iter - print " m128 val_%d_hi;" % iter - for x in range(self.num_masks): - print " m128 res_%d_%d;" % (iter, x) - if x != 0: - print " m128 res_shifted_%d_%d;" % (iter, x) - print " m128 r_%d;" % iter - print "#ifdef ARCH_64_BIT" - print " u64a r_%d_lopart;" % iter - print " u64a r_%d_hipart;" % iter - print "#else" - print " u32 r_%d_part1;" % iter - print " u32 r_%d_part2;" % iter - print " u32 r_%d_part3;" % iter - print " u32 r_%d_part4;" % iter - print "#endif" - - def produce_one_iteration_state_calc(self, iter, effective_num_iterations, - cautious, save_old): - if cautious: - print " val_%d = vectoredLoad128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks) - else: - print " val_%d = load128(ptr + %d);" % (iter, iter*16) - print " val_%d_lo = and128(val_%d, lomask);" % (iter, iter) - print " val_%d_hi = rshift2x64(val_%d, 4);" % (iter, iter) - print " val_%d_hi = and128(val_%d_hi, lomask);" % (iter, iter) - print - for x in range(self.num_masks): - print Template(""" - res_${ITER}_${X} = and128(pshufb(maskBase[${X}*2] , val_${ITER}_lo), - pshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x) - if x != 0: - if iter == 0: - print " res_shifted_%d_%d = palignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x) - else: - print " res_shifted_%d_%d = palignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x) - if x != 0 and iter == effective_num_iterations - 1 and save_old: - print " res_old_%d = res_%d_%d;" % (x, iter, x) - print - if cautious: - print " r_%d = and128(res_%d_0, p_mask);" % (iter, iter) - else: - print " r_%d = res_%d_0;" % (iter, iter) - for x in range(1, self.num_masks): - print " r_%d = and128(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x) - print - - def produce_one_iteration_confirm(self, iter, confirmCautious): - setup64 = [ (0, "r_%d_lopart" % iter, "movq(r_%d)" % iter), - (8, "r_%d_hipart" % iter, "movq(byteShiftRight128(r_%d, 8))" % iter) ] - - setup32 = [ (0, "r_%d_part1" % iter, "movd(r_%d)" % iter), - (4, "r_%d_part2" % iter, "movd(byteShiftRight128(r_%d, 4))" % iter), - (8, "r_%d_part3" % iter, "movd(byteShiftRight128(r_%d, 8))" % iter), - (12, "r_%d_part4" % iter, "movd(byteShiftRight128(r_%d, 12))" % iter) ] - - print " if (P0(isnonzero128(r_%d))) {" % (iter) - print "#ifdef ARCH_64_BIT" - for (off, val, init) in setup64: - print " %s = %s;" % (val, init) - for (off, val, init) in setup64: - self.produce_confirm(iter, val, off, 64, cautious = confirmCautious) - print "#else" - for (off, val, init) in setup32: - print " %s = %s;" % (val, init) - for (off, val, init) in setup32: - self.produce_confirm(iter, val, off, 32, cautious = confirmCautious) - print "#endif" - print " }" - - def produce_one_iteration(self, iter, effective_num_iterations, cautious = False, - confirmCautious = True, save_old = True): - self.produce_one_iteration_state_calc(iter, effective_num_iterations, cautious, save_old) - self.produce_one_iteration_confirm(iter, confirmCautious) - - def produce_code(self): - print self.produce_header(visible = True, header_only = False) - print """ - const u8 * buf = a->buf; - const size_t len = a->len; - const u8 * ptr = buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t * control = &controlVal; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 * tryFloodDetect = a->firstFloodDetect; - const struct FDRConfirm *fdrc; - u32 last_match = (u32)-1; -""" - print - - self.produce_needed_temporaries(self.num_iterations) - print - - print " const struct Teddy * teddy = (const struct Teddy *)fdr;" - print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));" - print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32));" % self.num_masks - print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);" - print " const size_t iterBytes = %d;" % (self.num_iterations * 16) - - print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \ - ' buf, len, a->start_offset);' - print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \ - ' mainStart);' - - for x in range(self.num_masks): - if (x != 0): - print " m128 res_old_%d = ones128();" % x - print " m128 lomask = set16x8(0xf);" - - print " if (ptr < mainStart) {" - print " ptr = mainStart - 16;" - self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True) - print " ptr += 16;" - print " }" - - print " if (ptr + 16 < buf + len) {" - self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True) - print " ptr += 16;" - print " }" - - print """ - for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - if (P0(ptr > tryFloodDetect)) { - tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes); - if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { - *a->groups = controlVal; - return HWLM_TERMINATED; - } - } -""" - for iter in range(self.num_iterations): - self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False) - - print " }" - - print " for (; ptr < buf + len; ptr += 16) {" - self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True) - print " }" - - print """ - *a->groups = controlVal; - return HWLM_SUCCESS; -} -""" - - def produce_compile_call(self): - packed_str = { False : "false", True : "true"}[self.packed] - print " { %d, %s, %d, %d, %s, %d, %d }," % ( - self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str, - self.conf_pull_back, self.conf_top_level_split) - - def get_name(self): - if self.packed: - pck_string = "_pck" - else: - pck_string = "" - - if self.num_buckets == 16: - type_string = "_fat" - else: - type_string = "" - - return "fdr_exec_teddy_%s_msks%d%s%s" % (self.arch.name, self.num_masks, pck_string, type_string) - - def __init__(self, arch, packed = False, num_masks = 1, num_buckets = 8): - self.arch = arch - self.packed = packed - self.num_masks = num_masks - self.num_buckets = num_buckets - self.num_iterations = 2 - - if packed: - self.conf_top_level_split = 32 - else: - self.conf_top_level_split = 1 - self.conf_pull_back = 0 - -class MTFat(MT): - def produce_needed_temporaries(self, max_iterations): - print " m256 p_mask;" - for iter in range(0, max_iterations): - print " m256 val_%d;" % iter - print " m256 val_%d_lo;" % iter - print " m256 val_%d_hi;" % iter - for x in range(self.num_masks): - print " m256 res_%d_%d;" % (iter, x) - if x != 0: - print " m256 res_shifted_%d_%d;" % (iter, x) - print " m256 r_%d;" % iter - print "#ifdef ARCH_64_BIT" - print " u64a r_%d_part1;" % iter - print " u64a r_%d_part2;" % iter - print " u64a r_%d_part3;" % iter - print " u64a r_%d_part4;" % iter - print "#else" - print " u32 r_%d_part1;" % iter - print " u32 r_%d_part2;" % iter - print " u32 r_%d_part3;" % iter - print " u32 r_%d_part4;" % iter - print " u32 r_%d_part5;" % iter - print " u32 r_%d_part6;" % iter - print " u32 r_%d_part7;" % iter - print " u32 r_%d_part8;" % iter - print "#endif" - - def produce_code(self): - print self.produce_header(visible = True, header_only = False) - print """ - const u8 * buf = a->buf; - const size_t len = a->len; - const u8 * ptr = buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t * control = &controlVal; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 * tryFloodDetect = a->firstFloodDetect; - const struct FDRConfirm *fdrc; - u32 last_match = (u32)-1; -""" - print - - self.produce_needed_temporaries(self.num_iterations) - print - - print " const struct Teddy * teddy = (const struct Teddy *)fdr;" - print " const m256 * maskBase = (const m256 *)((const u8 *)fdr + sizeof(struct Teddy));" - print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32*2));" % self.num_masks - print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);" - print " const size_t iterBytes = %d;" % (self.num_iterations * 16) - - print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \ - ' buf, len, a->start_offset);' - print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \ - ' mainStart);' - - for x in range(self.num_masks): - if (x != 0): - print " m256 res_old_%d = ones256();" % x - print " m256 lomask = set32x8(0xf);" - - print " if (ptr < mainStart) {" - print " ptr = mainStart - 16;" - self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True) - print " ptr += 16;" - print " }" - - print " if (ptr + 16 < buf + len) {" - self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True) - print " ptr += 16;" - print " }" - - print """ - for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - if (P0(ptr > tryFloodDetect)) { - tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes); - if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { - *a->groups = controlVal; - return HWLM_TERMINATED; - } - } -""" - - for iter in range(self.num_iterations): - self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False) - - print " }" - - print " for (; ptr < buf + len; ptr += 16) {" - self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True) - print " }" - - print """ - *a->groups = controlVal; - return HWLM_SUCCESS; -} -""" - - def produce_one_iteration_state_calc(self, iter, effective_num_iterations, - cautious, save_old): - if cautious: - print " val_%d = vectoredLoad2x128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks) - else: - print " val_%d = load2x128(ptr + %d);" % (iter, iter*16) - print " val_%d_lo = and256(val_%d, lomask);" % (iter, iter) - print " val_%d_hi = rshift4x64(val_%d, 4);" % (iter, iter) - print " val_%d_hi = and256(val_%d_hi, lomask);" % (iter, iter) - print - for x in range(self.num_masks): - print Template(""" - res_${ITER}_${X} = and256(vpshufb(maskBase[${X}*2] , val_${ITER}_lo), - vpshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x) - if x != 0: - if iter == 0: - print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x) - else: - print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x) - if x != 0 and iter == effective_num_iterations - 1 and save_old: - print " res_old_%d = res_%d_%d;" % (x, iter, x) - print - if cautious: - print " r_%d = and256(res_%d_0, p_mask);" % (iter, iter) - else: - print " r_%d = res_%d_0;" % (iter, iter) - for x in range(1, self.num_masks): - print " r_%d = and256(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x) - print - - def produce_one_iteration_confirm(self, iter, confirmCautious): - setup64 = [ (0, "r_%d_part1" % iter, "extractlow64from256(r)"), - (4, "r_%d_part2" % iter, "extract64from256(r, 1);\n r = interleave256hi(r_%d, r_swap)" % (iter)), - (8, "r_%d_part3" % iter, "extractlow64from256(r)"), - (12, "r_%d_part4" % iter, "extract64from256(r, 1)") ] - - setup32 = [ (0, "r_%d_part1" % iter, "extractlow32from256(r)"), - (2, "r_%d_part2" % iter, "extract32from256(r, 1)"), - (4, "r_%d_part3" % iter, "extract32from256(r, 2)"), - (6, "r_%d_part4" % iter, "extract32from256(r, 3);\n r = interleave256hi(r_%d, r_swap)" % (iter)), - (8, "r_%d_part5" % iter, "extractlow32from256(r)"), - (10, "r_%d_part6" % iter, "extract32from256(r, 1)"), - (12, "r_%d_part7" % iter, "extract32from256(r, 2)"), - (14, "r_%d_part8" % iter, "extract32from256(r, 3)") ] - - print " if (P0(isnonzero256(r_%d))) {" % (iter) - print " m256 r_swap = swap128in256(r_%d);" % (iter) - print " m256 r = interleave256lo(r_%d, r_swap);" % (iter) - print "#ifdef ARCH_64_BIT" - for (off, val, init) in setup64: - print " %s = %s;" % (val, init) - - for (off, val, init) in setup64: - self.produce_confirm(iter, val, off, 64, cautious = confirmCautious) - print "#else" - for (off, val, init) in setup32: - print " %s = %s;" % (val, init) - - for (off, val, init) in setup32: - self.produce_confirm(iter, val, off, 32, cautious = confirmCautious) - print "#endif" - print " }" - -class MTFast: - def produce_header(self, visible, header_only = False): - s = "" - if not visible: - s += "static never_inline" - s += """ -hwlm_error_t %s(UNUSED const struct FDR *fdr, - UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name() - if header_only: - s += ";" - else: - s += "{" - s += "\n" - return s - - def produce_guard(self): - print self.arch.get_guard() - - def produce_zero_alternative(self): - print """ -#else -#define %s 0 -#endif -""" % self.get_name() - - def close_guard(self): - print "#endif" - - def produce_confirm(self, cautious): - if cautious: - cautious_str = "VECTORING" - else: - cautious_str = "NOT_CAUTIOUS" - - print " for (u32 i = 0; i < arrCnt; i++) {" - print " u32 byte = bitArr[i] / 8;" - if self.packed: - conf_split_mask = IntegerType(32).constant_to_string( - self.conf_top_level_split - 1) - print " u32 bitRem = bitArr[i] % 8;" - print " u32 confSplit = *(ptr+byte) & 0x1f;" - print " u32 idx = confSplit * %d + bitRem;" % self.num_buckets - print " u32 cf = confBase[idx];" - print " if (!cf)" - print " continue;" - print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);" - print " if (!(fdrc->groups & *control))" - print " continue;" - print """ - CautionReason reason = %s; - CONF_TYPE v; - const u8 * confirm_loc = ptr + byte - 7; - if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { - v = lv_u64a(confirm_loc, buf, buf + len); - } else { // r == VECTORING, confirm_loc < buf - u64a histBytes = a->histBytes; - v = lv_u64a_ce(confirm_loc, buf, buf + len); - // stitch together v (which doesn't move) and history (which does) - u32 overhang = buf - confirm_loc; - histBytes >>= 64 - (overhang * 8); - v |= histBytes; - }""" % (cautious_str) - print " confWithBit(fdrc, a, ptr - buf + byte, 0, control, &last_match, v);" - else: - print " u32 cf = confBase[bitArr[i] % 8];" - print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);" - print """ - CautionReason reason = %s; - CONF_TYPE v; - const u8 * confirm_loc = ptr + byte - 7; - if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { - v = lv_u64a(confirm_loc, buf, buf + len); - } else { // r == VECTORING, confirm_loc < buf - u64a histBytes = a->histBytes; - v = lv_u64a_ce(confirm_loc, buf, buf + len); - // stitch together v (which doesn't move) and history (which does) - u32 overhang = buf - confirm_loc; - histBytes >>= 64 - (overhang * 8); - v |= histBytes; - }""" % (cautious_str) - print " confWithBit1(fdrc, a, ptr - buf + byte, control, &last_match, v);" - print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {" - print " *a->groups = controlVal;" - print " return HWLM_TERMINATED;" - print " }" - print " }" - - def produce_needed_temporaries(self, max_iterations): - print " u32 arrCnt;" - print " u16 bitArr[512];" - print " m256 p_mask;" - print " m256 val_0;" - print " m256 val_0_lo;" - print " m256 val_0_hi;" - print " m256 res_0;" - print " m256 res_1;" - print " m128 lo_part;" - print " m128 hi_part;" - print "#ifdef ARCH_64_BIT" - print " u64a r_0_part;" - print "#else" - print " u32 r_0_part;" - print "#endif" - - def produce_bit_scan(self, offset, bits): - print " while (P0(!!r_0_part)) {" - if bits == 64: - print " bitArr[arrCnt++] = (u16)findAndClearLSB_64(&r_0_part) + 64 * %d;" % (offset) - else: - print " bitArr[arrCnt++] = (u16)findAndClearLSB_32(&r_0_part) + 32 * %d;" % (offset) - print " }" - - def produce_bit_check_128(self, var_name, offset): - print " if (P0(isnonzero128(%s))) {" % (var_name) - print "#ifdef ARCH_64_BIT" - print " r_0_part = movq(%s);" % (var_name) - self.produce_bit_scan(offset, 64) - print " r_0_part = movq(byteShiftRight128(%s, 8));" % (var_name) - self.produce_bit_scan(offset + 1, 64) - print "#else" - print " r_0_part = movd(%s);" % (var_name) - self.produce_bit_scan(offset * 2, 32) - for step in range(1, 4): - print " r_0_part = movd(byteShiftRight128(%s, %d));" % (var_name, step * 4) - self.produce_bit_scan(offset * 2 + step, 32) - print "#endif" - print " }" - - def produce_bit_check_256(self, iter, single_iter, cautious): - print " if (P0(isnonzero256(res_%d))) {" % (iter) - if single_iter: - print " arrCnt = 0;" - print " lo_part = cast256to128(res_%d);" % (iter) - print " hi_part = cast256to128(swap128in256(res_%d));" % (iter) - self.produce_bit_check_128("lo_part", iter * 4) - self.produce_bit_check_128("hi_part", iter * 4 + 2) - if single_iter: - self.produce_confirm(cautious) - print " }" - - def produce_one_iteration_state_calc(self, iter, cautious): - if cautious: - print " val_0 = vectoredLoad256(&p_mask, ptr + %d, buf+a->start_offset, buf+len, a->buf_history, a->len_history);" % (iter * 32) - else: - print " val_0 = load256(ptr + %d);" % (iter * 32) - print " val_0_lo = and256(val_0, lomask);" - print " val_0_hi = rshift4x64(val_0, 4);" - print " val_0_hi = and256(val_0_hi, lomask);" - print " res_%d = and256(vpshufb(maskLo , val_0_lo), vpshufb(maskHi, val_0_hi));" % (iter) - if cautious: - print " res_%d = and256(res_%d, p_mask);" % (iter, iter) - - def produce_code(self): - print self.produce_header(visible = True, header_only = False) - print """ - const u8 * buf = a->buf; - const size_t len = a->len; - const u8 * ptr = buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t * control = &controlVal; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 * tryFloodDetect = a->firstFloodDetect; - const struct FDRConfirm *fdrc; - u32 last_match = (u32)-1; -""" - print - - self.produce_needed_temporaries(self.num_iterations) - - print " const struct Teddy * teddy = (const struct Teddy *)fdr;" - print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));" - print " const m256 maskLo = set2x128(maskBase[0]);" - print " const m256 maskHi = set2x128(maskBase[1]);" - print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + 32);" - print " const u8 * mainStart = ROUNDUP_PTR(ptr, 32);" - print " const size_t iterBytes = %d;" % (self.num_iterations * 32) - - print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \ - ' buf, len, a->start_offset);' - print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \ - ' mainStart);' - print " const m256 lomask = set32x8(0xf);" - - print " if (ptr < mainStart) {" - print " ptr = mainStart - 32;" - self.produce_one_iteration_state_calc(iter = 0, cautious = True) - self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True) - print " ptr += 32;" - print " }" - - print " if (ptr + 32 < buf + len) {" - self.produce_one_iteration_state_calc(iter = 0, cautious = False) - self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True) - print " ptr += 32;" - print " }" - print """ - for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - if (P0(ptr > tryFloodDetect)) { - tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes); - if (P0(controlVal == HWLM_TERMINATE_MATCHING)) { - *a->groups = controlVal; - return HWLM_TERMINATED; - } - } -""" - - for iter in range (0, self.num_iterations): - self.produce_one_iteration_state_calc(iter = iter, cautious = False) - print " arrCnt = 0;" - for iter in range (0, self.num_iterations): - self.produce_bit_check_256(iter = iter, single_iter = False, cautious = False) - self.produce_confirm(cautious = False) - print " }" - - print " for (; ptr < buf + len; ptr += 32) {" - self.produce_one_iteration_state_calc(iter = 0, cautious = True) - self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True) - print " }" - - print """ - *a->groups = controlVal; - return HWLM_SUCCESS; -} -""" - - def get_name(self): - if self.packed: - pck_string = "_pck" - else: - pck_string = "" - return "fdr_exec_teddy_%s_msks%d%s_fast" % (self.arch.name, self.num_masks, pck_string) - - def produce_compile_call(self): - packed_str = { False : "false", True : "true"}[self.packed] - print " { %d, %s, %d, %d, %s, %d, %d }," % ( - self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str, - self.conf_pull_back, self.conf_top_level_split) - - def __init__(self, arch, packed = False): - self.arch = arch - self.packed = packed - self.num_masks = 1 - self.num_buckets = 8 - self.num_iterations = 2 - - self.conf_top_level_split = 1 - self.conf_pull_back = 0 - if packed: - self.conf_top_level_split = 32 - else: - self.conf_top_level_split = 1 - self.conf_pull_back = 0 diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c new file mode 100644 index 000000000..33dd8a30d --- /dev/null +++ b/src/fdr/teddy_avx2.c @@ -0,0 +1,1110 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Teddy literal matcher: AVX2 engine runtime. + */ + +#include "fdr_internal.h" +#include "flood_runtime.h" +#include "teddy.h" +#include "teddy_internal.h" +#include "teddy_runtime_common.h" +#include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" + +#if defined(__AVX2__) + +static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = { + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} +}; + +#ifdef ARCH_64_BIT +#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(isnonzero256(var))) { \ + m256 swap = swap128in256(var); \ + m256 r = interleave256lo(var, swap); \ + u64a part1 = extractlow64from256(r); \ + u64a part2 = extract64from256(r, 1); \ + r = interleave256hi(var, swap); \ + u64a part3 = extractlow64from256(r); \ + u64a part4 = extract64from256(r, 1); \ + if (unlikely(part1)) { \ + conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part2)) { \ + conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part3)) { \ + conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part4)) { \ + conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + } \ +} while (0); +#else +#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(isnonzero256(var))) { \ + m256 swap = swap128in256(var); \ + m256 r = interleave256lo(var, swap); \ + u32 part1 = extractlow32from256(r); \ + u32 part2 = extract32from256(r, 1); \ + u32 part3 = extract32from256(r, 2); \ + u32 part4 = extract32from256(r, 3); \ + r = interleave256hi(var, swap); \ + u32 part5 = extractlow32from256(r); \ + u32 part6 = extract32from256(r, 1); \ + u32 part7 = extract32from256(r, 2); \ + u32 part8 = extract32from256(r, 3); \ + if (unlikely(part1)) { \ + conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part2)) { \ + conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \ + control, &last_match); \ + } \ + if (unlikely(part3)) { \ + conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part4)) { \ + conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part5)) { \ + conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part6)) { \ + conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part7)) { \ + conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part8)) { \ + conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \ + control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + } \ +} while (0); +#endif + +#define CONFIRM_FAST_TEDDY(var, offset, reason, conf_fn) \ +do { \ + if (unlikely(isnonzero256(var))) { \ + u32 arrCnt = 0; \ + m128 lo = cast256to128(var); \ + m128 hi = cast256to128(swap128in256(var)); \ + bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \ + bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \ + for (u32 i = 0; i < arrCnt; i++) { \ + conf_fn(bitArr[i], confBase, reason, a, ptr, control, \ + &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + } \ +} while (0); + +static really_inline +m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, + const u8 *buf_history, size_t len_history, + const u32 nMasks) { + m128 p_mask128; + m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, + len_history, nMasks)); + *p_mask = set2x128(p_mask128); + return ret; +} + +/* + * \brief Copy a block of [0,31] bytes efficiently. + * + * This function is a workaround intended to stop some compilers from + * synthesizing a memcpy function call out of the copy of a small number of + * bytes that we do in vectoredLoad128. + */ +static really_inline +void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) { + switch (len) { + case 0: + break; + case 1: + *dst = *src; + break; + case 2: + unaligned_store_u16(dst, unaligned_load_u16(src)); + break; + case 3: + unaligned_store_u16(dst, unaligned_load_u16(src)); + dst[2] = src[2]; + break; + case 4: + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 5: + case 6: + case 7: + /* Perform copy with two overlapping 4-byte chunks. */ + unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 8: + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + /* Perform copy with two overlapping 8-byte chunks. */ + unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 16: + storeu128(dst, loadu128(src)); + break; + default: + /* Perform copy with two overlapping 16-byte chunks. */ + assert(len < 32); + storeu128(dst + len - 16, loadu128(src + len - 16)); + storeu128(dst, loadu128(src)); + break; + } +} + +static really_inline +m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, + const u8 *buf_history, size_t len_history) { + union { + u8 val8[32]; + m256 val256; + } u; + + uintptr_t copy_start; + uintptr_t copy_len; + + if (ptr >= lo) { + uintptr_t avail = (uintptr_t)(hi - ptr); + if (avail >= 32) { + *p_mask = load256(p_mask_arr256[32] + 32); + return loadu256(ptr); + } + *p_mask = load256(p_mask_arr256[avail] + 32); + copy_start = 0; + copy_len = avail; + } else { + // need contains "how many chars to pull from history" + // calculate based on what we need, what we have in the buffer + // and only what we need to make primary confirm work + uintptr_t start = (uintptr_t)(lo - ptr); + uintptr_t i; + for (i = start; ptr + i < lo; i++) { + u.val8[i] = buf_history[len_history - (lo - (ptr + i))]; + } + uintptr_t end = MIN(32, (uintptr_t)(hi - ptr)); + *p_mask = loadu256(p_mask_arr256[end - start] + 32 - start); + copy_start = i; + copy_len = end - i; + } + + // Runt block from the buffer. + copyRuntBlock256(&u.val8[copy_start], &ptr[copy_start], copy_len); + + return u.val256; +} + +static really_inline +void do_confWithBit1_fast_teddy(u16 bits, const u32 *confBase, + CautionReason reason, + const struct FDR_Runtime_Args *a, + const u8 *ptr, hwlmcb_rv_t *control, + u32 *last_match) { + u32 byte = bits / 8; + u32 cf = confBase[bits % 8]; + const struct FDRConfirm *fdrc = (const struct FDRConfirm *) + ((const u8 *)confBase + cf); + u64a confVal = getConfVal(a, ptr, byte, reason); + confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); +} + +static really_inline +void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase, + CautionReason reason, + const struct FDR_Runtime_Args *a, const u8 *ptr, + hwlmcb_rv_t *control, u32 *last_match) { + u32 byte = bits / 8; + u32 bitRem = bits % 8; + u32 confSplit = *(ptr+byte) & 0x1f; + u32 idx = confSplit * 8 + bitRem; + u32 cf = confBase[idx]; + if (!cf) { + return; + } + const struct FDRConfirm *fdrc = (const struct FDRConfirm *) + ((const u8 *)confBase + cf); + if (!(fdrc->groups & *control)) { + return; + } + u64a confVal = getConfVal(a, ptr, byte, reason); + confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, last_match, confVal); +} + +static really_inline +void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) { + if (unlikely(isnonzero128(var))) { +#ifdef ARCH_64_BIT + u64a part_0 = movq(var); + while (unlikely(part_0)) { + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) + + 64 * (offset); + *arrCnt += 1; + } + u64a part_1 = movq(byteShiftRight128(var, 8)); + while (unlikely(part_1)) { + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + + 64 * (offset + 1); + *arrCnt += 1; + } +#else + u32 part_0 = movd(var); + while (unlikely(part_0)) { + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) + + 32 * (offset * 2); + *arrCnt += 1; + } + u32 part_1 = movd(byteShiftRight128(var, 4)); + while (unlikely(part_1)) { + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + + 32 * (offset * 2 + 1); + *arrCnt += 1; + } + u32 part_2 = movd(byteShiftRight128(var, 8)); + while (unlikely(part_2)) { + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) + + 32 * (offset * 2 + 2); + *arrCnt += 1; + } + u32 part_3 = movd(byteShiftRight128(var, 12)); + while (unlikely(part_3)) { + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) + + 32 * (offset * 2 + 3); + *arrCnt += 1; + } +#endif + } +} + +static really_inline +m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 p_mask, m256 val) { + m256 mask = set32x8(0xf); + m256 lo = and256(val, mask); + m256 hi = and256(rshift4x64(val, 4), mask); + return and256(and256(vpshufb(maskBase[0*2], lo), + vpshufb(maskBase[0*2+1], hi)), p_mask); +} + +static really_inline +m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 p_mask, + m256 val) { + m256 mask = set32x8(0xf); + m256 lo = and256(val, mask); + m256 hi = and256(rshift4x64(val, 4), mask); + m256 r = prep_conf_fat_teddy_m1(maskBase, p_mask, val); + + m256 res_1 = and256(vpshufb(maskBase[1*2], lo), + vpshufb(maskBase[1*2+1], hi)); + m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1); + *old_1 = res_1; + return and256(and256(r, p_mask), res_shifted_1); +} + +static really_inline +m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2, + m256 p_mask, m256 val) { + m256 mask = set32x8(0xf); + m256 lo = and256(val, mask); + m256 hi = and256(rshift4x64(val, 4), mask); + m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, p_mask, val); + + m256 res_2 = and256(vpshufb(maskBase[2*2], lo), + vpshufb(maskBase[2*2+1], hi)); + m256 res_shifted_2 = vpalignr(res_2, *old_2, 16-2); + *old_2 = res_2; + return and256(r, res_shifted_2); +} + +static really_inline +m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, + m256 *old_3, m256 p_mask, m256 val) { + m256 mask = set32x8(0xf); + m256 lo = and256(val, mask); + m256 hi = and256(rshift4x64(val, 4), mask); + m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, p_mask, val); + + m256 res_3 = and256(vpshufb(maskBase[3*2], lo), + vpshufb(maskBase[3*2+1], hi)); + m256 res_shifted_3 = vpalignr(res_3, *old_3, 16-3); + *old_3 = res_3; + return and256(r, res_shifted_3); +} + +static really_inline +m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi, + m256 p_mask) { + m256 lo = and256(val, mask); + m256 hi = and256(rshift4x64(val, 4), mask); + m256 res = and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi)); + return and256(res, p_mask); +} + +static really_inline +const m256 * getMaskBase_avx2(const struct Teddy *teddy) { + return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy)); +} + +static really_inline +const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { + return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + + (numMask*32*2)); +} + +hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 1); + + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); + m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(), + load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 1); + + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(), + load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 1); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 2); + + m256 res_old_1 = ones256(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); + m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 2); + + m256 res_old_1 = ones256(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 2); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 3); + + m256 res_old_1 = ones256(); + m256 res_old_2 = ones256(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); + m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones256(), load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 3); + + m256 res_old_1 = ones256(); + m256 res_old_2 = ones256(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones256(), load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + ones256(), load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 3); + m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, + p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 4); + + m256 res_old_1 = ones256(); + m256 res_old_2 = ones256(); + m256 res_old_3 = ones256(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); + m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones256(), + load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 32; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m256 *maskBase = getMaskBase_avx2(teddy); + const u32 *confBase = getConfBase_avx2(teddy, 4); + + m256 res_old_1 = ones256(); + m256 res_old_2 = ones256(); + m256 res_old_3 = ones256(); + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 16; + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + if (ptr + 16 < buf_end) { + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + ptr += 16; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones256(), + load2x128(ptr)); + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); + m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, ones256(), + load2x128(ptr + 16)); + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); + } + + for (; ptr < buf_end; ptr += 16) { + m256 p_mask; + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, + a->buf_history, a->len_history, 4); + m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, + &res_old_3, p_mask, val_0); + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 64; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 1); + + const m256 maskLo = set2x128(maskBase[0]); + const m256 maskHi = set2x128(maskBase[1]); + const m256 mask = set32x8(0xf); + u16 bitArr[512]; + + const u8 *mainStart = ROUNDUP_PTR(ptr, 32); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 32; + m256 p_mask; + m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, + buf_end, a->buf_history, a->len_history); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + p_mask); + CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); + ptr += 32; + } + + if (ptr + 32 < buf_end) { + m256 val_0 = load256(ptr + 0); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + ones256()); + CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); + ptr += 32; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + + m256 val_0 = load256(ptr + 0); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + ones256()); + CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); + + m256 val_1 = load256(ptr + 32); + m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi, + ones256()); + CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); + } + + for (; ptr < buf_end; ptr += 32) { + m256 p_mask; + m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, + buf_end, a->buf_history, a->len_history); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + p_mask); + CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, + const struct FDR_Runtime_Args *a) { + const u8 *buf_end = a->buf + a->len; + const u8 *ptr = a->buf + a->start_offset; + hwlmcb_rv_t controlVal = *a->groups; + hwlmcb_rv_t *control = &controlVal; + u32 floodBackoff = FLOOD_BACKOFF_START; + const u8 *tryFloodDetect = a->firstFloodDetect; + u32 last_match = (u32)-1; + const struct Teddy *teddy = (const struct Teddy *)fdr; + const size_t iterBytes = 64; + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", + a->buf, a->len, a->start_offset); + + const m128 *maskBase = getMaskBase(teddy); + const u32 *confBase = getConfBase(teddy, 1); + + const m256 maskLo = set2x128(maskBase[0]); + const m256 maskHi = set2x128(maskBase[1]); + const m256 mask = set32x8(0xf); + u16 bitArr[512]; + + const u8 *mainStart = ROUNDUP_PTR(ptr, 32); + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); + if (ptr < mainStart) { + ptr = mainStart - 32; + m256 p_mask; + m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, + buf_end, a->buf_history, a->len_history); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + p_mask); + CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); + ptr += 32; + } + + if (ptr + 32 < buf_end) { + m256 val_0 = load256(ptr + 0); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + ones256()); + CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); + ptr += 32; + } + + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { + __builtin_prefetch(ptr + (iterBytes*4)); + CHECK_FLOOD; + + m256 val_0 = load256(ptr + 0); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + ones256()); + CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy); + + m256 val_1 = load256(ptr + 32); + m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi, + ones256()); + CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy); + } + + for (; ptr < buf_end; ptr += 32) { + m256 p_mask; + m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, + buf_end, a->buf_history, a->len_history); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, + p_mask); + CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); + } + *a->groups = controlVal; + return HWLM_SUCCESS; +} + +#endif // __AVX2__ diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index ead448a8f..d95f4937a 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -64,7 +64,32 @@ bool TeddyEngineDescription::needConfirm(const vector &lits) const return false; } -#include "teddy_autogen_compiler.cpp" +void getTeddyDescriptions(vector *out) { + static const TeddyEngineDef defns[] = { + { 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false, 0, 1 }, + { 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true, 0, 32 }, + { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false, 0, 1 }, + { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true, 0, 32 }, + { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false, 0, 1 }, + { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true, 0, 32 }, + { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false, 0, 1 }, + { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true, 0, 32 }, + { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false, 0, 1 }, + { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true, 0, 32 }, + { 11, 0, 1, 8, false, 0, 1 }, + { 12, 0, 1, 8, true, 0, 32 }, + { 13, 0, 2, 8, false, 0, 1 }, + { 14, 0, 2, 8, true, 0, 32 }, + { 15, 0, 3, 8, false, 0, 1 }, + { 16, 0, 3, 8, true, 0, 32 }, + { 17, 0, 4, 8, false, 0, 1 }, + { 18, 0, 4, 8, true, 0, 32 }, + }; + out->clear(); + for (const auto &def : defns) { + out->emplace_back(def); + } +} static size_t maxFloodTailLen(const vector &vl) { diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h new file mode 100644 index 000000000..c50b4d16d --- /dev/null +++ b/src/fdr/teddy_runtime_common.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Teddy literal matcher: common runtime procedures. + */ + +#ifndef TEDDY_RUNTIME_COMMON_H_ +#define TEDDY_RUNTIME_COMMON_H_ + +#include "fdr_confirm.h" +#include "fdr_confirm_runtime.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32]; + +#ifdef ARCH_64_BIT +#define TEDDY_CONF_TYPE u64a +#define TEDDY_FIND_AND_CLEAR_LSB(conf) findAndClearLSB_64(conf) +#else +#define TEDDY_CONF_TYPE u32 +#define TEDDY_FIND_AND_CLEAR_LSB(conf) findAndClearLSB_32(conf) +#endif + +#define CHECK_HWLM_TERMINATE_MATCHING \ +do { \ + if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ + *a->groups = controlVal; \ + return HWLM_TERMINATED; \ + } \ +} while (0); + +#define CHECK_FLOOD \ +do { \ + if (unlikely(ptr > tryFloodDetect)) { \ + tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \ + &floodBackoff, &controlVal, \ + iterBytes); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ +} while (0); + +/* + * \brief Copy a block of [0,15] bytes efficiently. + * + * This function is a workaround intended to stop some compilers from + * synthesizing a memcpy function call out of the copy of a small number of + * bytes that we do in vectoredLoad128. + */ +static really_inline +void copyRuntBlock128(u8 *dst, const u8 *src, size_t len) { + switch (len) { + case 0: + break; + case 1: + *dst = *src; + break; + case 2: + unaligned_store_u16(dst, unaligned_load_u16(src)); + break; + case 3: + unaligned_store_u16(dst, unaligned_load_u16(src)); + dst[2] = src[2]; + break; + case 4: + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 5: + case 6: + case 7: + /* Perform copy with two overlapping 4-byte chunks. */ + unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 8: + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + default: + /* Perform copy with two overlapping 8-byte chunks. */ + assert(len < 16); + unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + } +} + +// Note: p_mask is an output param that initialises a poison mask. +static really_inline +m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, + const u8 *buf_history, size_t len_history, + const u32 nMasks) { + union { + u8 val8[16]; + m128 val128; + } u; + u.val128 = zeroes128(); + + uintptr_t copy_start; + uintptr_t copy_len; + + if (ptr >= lo) { + uintptr_t avail = (uintptr_t)(hi - ptr); + if (avail >= 16) { + *p_mask = load128(p_mask_arr[16] + 16); + return loadu128(ptr); + } + *p_mask = load128(p_mask_arr[avail] + 16); + copy_start = 0; + copy_len = avail; + } else { + uintptr_t need = MIN((uintptr_t)(lo - ptr), + MIN(len_history, nMasks - 1)); + uintptr_t start = (uintptr_t)(lo - ptr); + uintptr_t i; + for (i = start - need; ptr + i < lo; i++) { + u.val8[i] = buf_history[len_history - (lo - (ptr + i))]; + } + uintptr_t end = MIN(16, (uintptr_t)(hi - ptr)); + *p_mask = loadu128(p_mask_arr[end - start] + 16 - start); + copy_start = i; + copy_len = end - i; + } + + // Runt block from the buffer. + copyRuntBlock128(&u.val8[copy_start], &ptr[copy_start], copy_len); + + return u.val128; +} + +static really_inline +u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte, + CautionReason reason) { + u64a confVal = 0; + const u8 *buf = a->buf; + size_t len = a->len; + const u8 *confirm_loc = ptr + byte - 7; + if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { + confVal = lv_u64a(confirm_loc, buf, buf + len); + } else { // r == VECTORING, confirm_loc < buf + u64a histBytes = a->histBytes; + confVal = lv_u64a_ce(confirm_loc, buf, buf + len); + // stitch together confVal and history + u32 overhang = buf - confirm_loc; + histBytes >>= 64 - (overhang * 8); + confVal |= histBytes; + } + return confVal; +} + +static really_inline +void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, + const u32 *confBase, CautionReason reason, + const struct FDR_Runtime_Args *a, const u8 *ptr, + hwlmcb_rv_t *control, u32 *last_match) { + do { + u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); + u32 byte = bit / bucket + offset; + u32 bitRem = bit % bucket; + u32 confSplit = *(ptr+byte) & 0x1f; + u32 idx = confSplit * bucket + bitRem; + u32 cf = confBase[idx]; + if (!cf) { + continue; + } + const struct FDRConfirm *fdrc = (const struct FDRConfirm *) + ((const u8 *)confBase + cf); + if (!(fdrc->groups & *control)) { + continue; + } + u64a confVal = getConfVal(a, ptr, byte, reason); + confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, + last_match, confVal); + } while (unlikely(*conf)); +} + +static really_inline +void do_confWithBit1_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, + const u32 *confBase, CautionReason reason, + const struct FDR_Runtime_Args *a, const u8 *ptr, + hwlmcb_rv_t *control, u32 *last_match) { + do { + u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); + u32 byte = bit / bucket + offset; + u32 idx = bit % bucket; + u32 cf = confBase[idx]; + const struct FDRConfirm *fdrc = (const struct FDRConfirm *) + ((const u8 *)confBase + cf); + if (!(fdrc->groups & *control)) { + continue; + } + u64a confVal = getConfVal(a, ptr, byte, reason); + confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match, + confVal); + } while (unlikely(*conf)); +} + +static really_inline +void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, + const u32 *confBase, CautionReason reason, + const struct FDR_Runtime_Args *a, const u8 *ptr, + hwlmcb_rv_t *control, u32 *last_match) { + do { + u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); + u32 byte = bit / bucket + offset; + u32 idx = bit % bucket; + u32 cf = confBase[idx]; + const struct FDRConfirm *fdrc = (const struct FDRConfirm *) + ((const u8 *)confBase + cf); + if (!(fdrc->groups & *control)) { + continue; + } + u64a confVal = getConfVal(a, ptr, byte, reason); + confWithBitMany(fdrc, a, ptr - a->buf + byte, reason, control, + last_match, confVal); + } while (unlikely(*conf)); +} + +static really_inline +const m128 * getMaskBase(const struct Teddy *teddy) { + return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy)); +} + +static really_inline +const u32 * getConfBase(const struct Teddy *teddy, u8 numMask) { + return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + + (numMask*32)); +} + +#endif /* TEDDY_RUNTIME_COMMON_H_ */ From f48b8c937b275622b1fb1ecaaa5260459a8a4195 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 May 2016 16:03:39 +1000 Subject: [PATCH 197/218] ucp_table: don't always deref rv of lower_bound Fixes a warning from asan. --- src/parser/ucp_table.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/parser/ucp_table.cpp b/src/parser/ucp_table.cpp index 7b53d1d6a..a6cb57cdd 100644 --- a/src/parser/ucp_table.cpp +++ b/src/parser/ucp_table.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -117,13 +117,12 @@ void make_caseless(CodePointSet *cps) { bool flip_case(unichar *c) { assert(c); - const unicase *const uc_begin = ucp_caseless_def; - const unicase *const uc_end = - ucp_caseless_def + ARRAY_LENGTH(ucp_caseless_def); - const unicase test = { *c, 0 }; - const unicase *f = lower_bound(uc_begin, uc_end, test); - if (f->base == *c) { + + const auto uc_begin = begin(ucp_caseless_def); + const auto uc_end = end(ucp_caseless_def); + const auto f = lower_bound(uc_begin, uc_end, test); + if (f != uc_end && f->base == *c) { DEBUG_PRINTF("flipped c=%x to %x\n", *c, f->caseless); *c = f->caseless; return true; From 97eaea043e31d3566aef1cf0f51d3f0da6a81474 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 May 2016 16:10:57 +1000 Subject: [PATCH 198/218] ucp_table: clean up make_caseless --- src/parser/ucp_table.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/parser/ucp_table.cpp b/src/parser/ucp_table.cpp index a6cb57cdd..fc1330fe7 100644 --- a/src/parser/ucp_table.cpp +++ b/src/parser/ucp_table.cpp @@ -83,14 +83,13 @@ void make_caseless(CodePointSet *cps) { CodePointSet base = *cps; - const unicase *uc_begin = ucp_caseless_def; - const unicase *const uc_end = ucp_caseless_def - + ARRAY_LENGTH(ucp_caseless_def); - DEBUG_PRINTF("uc len %zd\n", uc_end - uc_begin); + auto uc_begin = begin(ucp_caseless_def); + auto uc_end = end(ucp_caseless_def); + DEBUG_PRINTF("uc len %zd\n", distance(uc_begin, uc_end)); - for (auto it = base.begin(), ite = base.end(); it != ite; ++it) { - unichar b = lower(*it); - unichar e = upper(*it) + 1; + for (const auto &elem : base) { + unichar b = lower(elem); + unichar e = upper(elem) + 1; for (; b < e; b++) { DEBUG_PRINTF("decasing %x\n", b); @@ -101,7 +100,7 @@ void make_caseless(CodePointSet *cps) { DEBUG_PRINTF("EOL\n"); return; } - while (uc_begin->base == b) { + while (uc_begin != uc_end && uc_begin->base == b) { DEBUG_PRINTF("at {%x,%x}\n", uc_begin->base, uc_begin->caseless); cps->set(uc_begin->caseless); ++uc_begin; From 95f35aad0e3372e3fcc5cd2bf0b734c0f6af0c0d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 10 May 2016 09:32:13 +1000 Subject: [PATCH 199/218] fdr: ensure 16 bytes of pre-history in block mode --- src/fdr/fdr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 51a041cc9..ff69853e2 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -800,12 +800,16 @@ static const u8 fake_history[FAKE_HISTORY_SIZE]; hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, hwlm_group_t groups) { + // We guarantee (for safezone construction) that it is safe to read 16 + // bytes before the end of the history buffer. + const u8 *hbuf = fake_history + FAKE_HISTORY_SIZE; + const struct FDR_Runtime_Args a = { buf, len, - fake_history, + hbuf, 0, - fake_history, // nocase + hbuf, // nocase 0, start, cb, From 0e8867b3938bd11bf94aaafdc40aa8ae29a38d50 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 10 May 2016 13:14:25 +1000 Subject: [PATCH 200/218] Combine struct elements to avoid inheritance The multiple inheritance here was slightly overkill, and it also appears to trigger a MSVC bug when writing through the pointer to member. --- src/nfagraph/ng_depth.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_depth.h b/src/nfagraph/ng_depth.h index 38a98a1c6..16231ea1e 100644 --- a/src/nfagraph/ng_depth.h +++ b/src/nfagraph/ng_depth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -64,7 +64,11 @@ struct NFAVertexRevDepth { /** * \brief Encapsulates min/max depths relative to all of our special vertices. */ -struct NFAVertexBidiDepth : NFAVertexDepth, NFAVertexRevDepth { +struct NFAVertexBidiDepth { + DepthMinMax fromStart; + DepthMinMax fromStartDotStar; + DepthMinMax toAccept; + DepthMinMax toAcceptEod; }; /** From ba77229c029dd71852623c2ada3b182cbac28aa6 Mon Sep 17 00:00:00 2001 From: Boris Nagaev Date: Wed, 18 May 2016 07:59:57 +0200 Subject: [PATCH 201/218] install .dll to bin/, not to lib/ --- CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bc68474f..5af4ba7a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -951,7 +951,10 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) OUTPUT_NAME hs_runtime MACOSX_RPATH ON LINKER_LANGUAGE C) - install(TARGETS hs_runtime_shared DESTINATION lib) + install(TARGETS hs_runtime_shared + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) endif() # we want the static lib for testing @@ -971,7 +974,10 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) VERSION ${LIB_VERSION} SOVERSION ${LIB_SOVERSION} MACOSX_RPATH ON) -install(TARGETS hs_shared DESTINATION lib) +install(TARGETS hs_shared + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) endif() if(NOT WIN32) From 74d5d0e81993987cd24e8bd93a2e0563be0e3a1c Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 18 May 2016 15:50:38 +1000 Subject: [PATCH 202/218] cmake: fix optimise define --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5af4ba7a4..10a7b359c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,7 +115,9 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) endif() #for config -set(HS_OPTIMIZE OPTIMISE) +if (OPTIMISE) + set(HS_OPTIMIZE ON) +endif() CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF) From e52783017eeb6093a8bac160d6125d658664eebb Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 27 Apr 2016 15:05:44 +1000 Subject: [PATCH 203/218] pkgconfig: Add Libs.private for static linking --- CMakeLists.txt | 8 ++++++++ libhs.pc.in | 1 + 2 files changed, 9 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 10a7b359c..7439f50b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -349,6 +349,14 @@ configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h) configure_file(src/hs_version.h.in ${PROJECT_BINARY_DIR}/hs_version.h) if (NOT WIN32) + # expand out library names for pkgconfig static link info + foreach (LIB ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES}) + # this is fragile, but protects us from toolchain specific files + if (NOT EXISTS ${LIB}) + set(PRIVATE_LIBS "${PRIVATE_LIBS} -l${LIB}") + endif() + endforeach() + configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars install(FILES ${CMAKE_BINARY_DIR}/libhs.pc DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig") diff --git a/libhs.pc.in b/libhs.pc.in index b67f9de6b..e16460e88 100644 --- a/libhs.pc.in +++ b/libhs.pc.in @@ -7,4 +7,5 @@ Name: libhs Description: Intel(R) Hyperscan Library Version: @HS_VERSION@ Libs: -L${libdir} -lhs +Libs.private: @PRIVATE_LIBS@ Cflags: -I${includedir}/hs From 2c762909feb345dba0484391e4c16486c34d7421 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 May 2016 15:11:07 +1000 Subject: [PATCH 204/218] fdr: remove dead function decls from teddy.h --- src/fdr/teddy.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h index a0377f60b..e1ee90ea3 100644 --- a/src/fdr/teddy.h +++ b/src/fdr/teddy.h @@ -38,15 +38,6 @@ struct FDR; // forward declaration from fdr_internal.h struct FDR_Runtime_Args; -hwlm_error_t fdr_exec_s1_w128(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); - -hwlm_error_t fdr_exec_s2_w128(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); - -hwlm_error_t fdr_exec_s4_w128(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); - hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, const struct FDR_Runtime_Args *a); From 142e74e8e61112237b139f6f28a3d23acf6bbad1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 May 2016 15:14:20 +1000 Subject: [PATCH 205/218] fdr: teddy.h does not need to include hwlm.h --- src/fdr/teddy.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h index e1ee90ea3..f39027237 100644 --- a/src/fdr/teddy.h +++ b/src/fdr/teddy.h @@ -33,8 +33,6 @@ #ifndef TEDDY_H_ #define TEDDY_H_ -#include "hwlm/hwlm.h" - struct FDR; // forward declaration from fdr_internal.h struct FDR_Runtime_Args; From 52346397369120f16d559a5b782aa70a4d243313 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 20 May 2016 15:30:29 +1000 Subject: [PATCH 206/218] popcount: use intrinsics and restructure defines --- src/util/popcount.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/util/popcount.h b/src/util/popcount.h index 510a449a1..d882a6720 100644 --- a/src/util/popcount.h +++ b/src/util/popcount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,21 +38,17 @@ // We have a native popcount where the compiler has defined __POPCNT__. #if defined(__POPCNT__) #define HAVE_POPCOUNT_INSTR -#endif - -#if defined(_WIN32) && defined(__AVX__) // TODO: fix win preproc +#elif defined(_WIN32) && defined(__AVX__) // TODO: fix win preproc #define HAVE_POPCOUNT_INSTR -#define __builtin_popcount __popcnt -#define __builtin_popcountll __popcnt64 #endif static really_inline u32 popcount32(u32 x) { #if defined(HAVE_POPCOUNT_INSTR) // Single-instruction builtin. - return (u32)__builtin_popcount(x); + return _mm_popcnt_u32(x); #else - // Fast branch-free version from bit-twiddling hacks as most Intel + // Fast branch-free version from bit-twiddling hacks as older Intel // processors do not have a POPCNT instruction. x -= (x >> 1) & 0x55555555; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); @@ -62,16 +58,18 @@ u32 popcount32(u32 x) { static really_inline u32 popcount64(u64a x) { -#if defined(HAVE_POPCOUNT_INSTR) +#if defined(ARCH_X86_64) +# if defined(HAVE_POPCOUNT_INSTR) // Single-instruction builtin. - return (u32)__builtin_popcountll(x); -#elif defined(ARCH_X86_64) - // Fast branch-free version from bit-twiddling hacks as most Intel + return (u32)_mm_popcnt_u64(x); +# else + // Fast branch-free version from bit-twiddling hacks as older Intel // processors do not have a POPCNT instruction. x -= (x >> 1) & 0x5555555555555555; x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f; return (x * 0x0101010101010101) >> 56; +# endif #else // Synthesise from two 32-bit cases. return popcount32(x >> 32) + popcount32(x); From b5be8ef6aa59f9cd038d89a90169aca75e08dc4a Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 7 Apr 2016 13:17:55 +1000 Subject: [PATCH 207/218] Win32 ctz and clz --- src/util/bitutils.h | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/util/bitutils.h b/src/util/bitutils.h index 979a2c045..c863fba9b 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -86,10 +86,20 @@ u32 clz32(u32 x) { static really_inline u32 clz64(u64a x) { assert(x); // behaviour not defined for x == 0 -#if defined(_WIN32) +#if defined(_WIN64) unsigned long r; _BitScanReverse64(&r, x); return 63 - r; +#elif defined(_WIN32) + unsigned long x1 = (u32)x; + unsigned long x2 = (u32)(x >> 32); + unsigned long r; + if (x2) { + _BitScanReverse(&r, x2); + return (u32)(31 - r); + } + _BitScanReverse(&r, (u32)x1); + return (u32)(63 - r); #else return (u32)__builtin_clzll(x); #endif @@ -111,10 +121,17 @@ u32 ctz32(u32 x) { static really_inline u32 ctz64(u64a x) { assert(x); // behaviour not defined for x == 0 -#if defined(_WIN32) +#if defined(_WIN64) unsigned long r; _BitScanForward64(&r, x); return r; +#elif defined(_WIN32) + unsigned long r; + if (_BitScanForward(&r, (u32)x)) { + return (u32)r; + } + _BitScanForward(&r, x >> 32); + return (u32)(r + 32); #else return (u32)__builtin_ctzll(x); #endif @@ -177,8 +194,8 @@ u32 findAndClearLSB_64(u64a *v) { #else // fall back to doing things with two 32-bit cases, since gcc-4.1 doesn't // inline calls to __builtin_ctzll - u32 v1 = *v; - u32 v2 = (*v >> 32); + u32 v1 = (u32)*v; + u32 v2 = (u32)(*v >> 32); u32 offset; if (v1) { offset = findAndClearLSB_32(&v1); @@ -233,7 +250,7 @@ u32 findAndClearMSB_64(u64a *v) { #else // fall back to doing things with two 32-bit cases, since gcc-4.1 doesn't // inline calls to __builtin_ctzll - u32 v1 = *v; + u32 v1 = (u32)*v; u32 v2 = (*v >> 32); u32 offset; if (v2) { From 3527a9cb5cd844cda4a050604311fe3c22d35481 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 12 May 2016 08:51:02 +1000 Subject: [PATCH 208/218] masked move: C linkage --- src/util/masked_move.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/util/masked_move.h b/src/util/masked_move.h index c7bf04daa..93c79e758 100644 --- a/src/util/masked_move.h +++ b/src/util/masked_move.h @@ -35,8 +35,14 @@ #include "simd_utils.h" #include "simd_utils_ssse3.h" +#ifdef __cplusplus +extern "C" { +#endif extern const u32 mm_mask_mask[16]; extern const u32 mm_shuffle_end[32][8]; +#ifdef __cplusplus +} +#endif /* load mask for len bytes from start of buffer */ static really_inline m256 From 1d6462532a3cb61f14cedadf7cd2c90b88f58cce Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 12 May 2016 08:39:12 +1000 Subject: [PATCH 209/218] vbs: the static array has C linkage --- src/util/simd_utils_ssse3.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/util/simd_utils_ssse3.h b/src/util/simd_utils_ssse3.h index 268bf4229..8ab896218 100644 --- a/src/util/simd_utils_ssse3.h +++ b/src/util/simd_utils_ssse3.h @@ -77,7 +77,13 @@ m128 pshufb(m128 a, m128 b) { return result; } +#ifdef __cplusplus +extern "C" { +#endif extern const char vbs_mask_data[]; +#ifdef __cplusplus +} +#endif static really_inline m128 variable_byte_shift_m128(m128 in, s32 amount) { From d36cc9ea4c3ff748f5f7f89a611abf1a99ab09f9 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 9 May 2016 10:53:54 +1000 Subject: [PATCH 210/218] debug printf: Windows path separator --- src/ue2common.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/ue2common.h b/src/ue2common.h index b279f81a5..2de607532 100644 --- a/src/ue2common.h +++ b/src/ue2common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -194,12 +194,20 @@ typedef u32 ReportID; #define unlikely(x) (x) #endif +#if !defined(RELEASE_BUILD) || defined(DEBUG) +#ifdef _WIN32 +#define PATH_SEP '\\' +#else +#define PATH_SEP '/' +#endif +#endif + #if defined(DEBUG) && !defined(DEBUG_PRINTF) #include #include #define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \ - strrchr(__FILE__, '/') + 1, __func__, \ - __LINE__, ## __VA_ARGS__) + strrchr(__FILE__, PATH_SEP) + 1, \ + __func__, __LINE__, ## __VA_ARGS__) #elif !defined(DEBUG_PRINTF) #define DEBUG_PRINTF(format, ...) do { } while(0) #endif @@ -208,8 +216,8 @@ typedef u32 ReportID; #include #include #define ADEBUG_PRINTF(format, ...) printf("!%s:%s:%d:" format, \ - strrchr(__FILE__, '/') + 1, __func__, \ - __LINE__, ## __VA_ARGS__) + strrchr(__FILE__, PATH_SEP) + 1, \ + __func__, __LINE__, ## __VA_ARGS__) #else #define ADEBUG_PRINTF(format, ...) do { } while(0) #endif From ca2343f98ea3e4b2373bd8dd7c34db3fde36c57b Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 9 May 2016 10:47:15 +1000 Subject: [PATCH 211/218] We need an immediate so use intrinsic directly --- src/util/simd_utils_ssse3.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/util/simd_utils_ssse3.h b/src/util/simd_utils_ssse3.h index 8ab896218..6854ade35 100644 --- a/src/util/simd_utils_ssse3.h +++ b/src/util/simd_utils_ssse3.h @@ -98,16 +98,14 @@ static really_inline m256 vpshufb(m256 a, m256 b) { return _mm256_shuffle_epi8(a, b); } + #if defined(USE_GCC_COMPOUND_STATEMENTS) #define vpalignr(r, l, offset) ({ \ m256 res = _mm256_alignr_epi8(r, l, offset); \ res; \ }) #else -static really_inline -m256 vpalignr(m256 r, m256 l, const int offset) { - return _mm256_alignr_epi8(r, l, offset); -} +#define vpalignr(r, l, offset) _mm256_alignr_epi8(r, l, offset) #endif #else // not __AVX2__ From 89bc2b4b39e38d8c1fd240605d414838d899f19b Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 23 May 2016 14:57:48 +1000 Subject: [PATCH 212/218] cmake: invoke the compiler to test arch features We require SSSE3, and optionally support AVX2, and the best way of testing the compiler and compile flags is to run the compiler. --- CMakeLists.txt | 13 +++++++++++-- cmake/arch.cmake | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 cmake/arch.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 7439f50b8..fe486eedd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -237,6 +237,9 @@ if (RELEASE_BUILD) endif() endif() +# ensure we are building for the right target arch +include (${CMAKE_MODULE_PATH}/arch.cmake) + # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) @@ -403,7 +406,6 @@ set (hs_exec_SRCS src/fdr/flood_runtime.h src/fdr/fdr_loadval.h src/fdr/teddy.c - src/fdr/teddy_avx2.c src/fdr/teddy.h src/fdr/teddy_internal.h src/fdr/teddy_runtime_common.h @@ -513,7 +515,6 @@ set (hs_exec_SRCS src/util/fatbit.h src/util/fatbit.c src/util/join.h - src/util/masked_move.c src/util/masked_move.h src/util/multibit.h src/util/multibit_internal.h @@ -540,6 +541,14 @@ set (hs_exec_SRCS src/database.h ) +if (HAVE_AVX2) + set (hs_exec_SRCS + ${hs_exec_SRCS} + src/fdr/teddy_avx2.c + src/util/masked_move.c + ) +endif () + SET (hs_SRCS ${hs_HEADERS} diff --git a/cmake/arch.cmake b/cmake/arch.cmake new file mode 100644 index 000000000..c00401dd0 --- /dev/null +++ b/cmake/arch.cmake @@ -0,0 +1,42 @@ +# detect architecture features +# +# must be called after determining where compiler intrinsics are defined + +if (HAVE_C_X86INTRIN_H) + set (INTRIN_INC_H "x86intrin.h") +elseif (HAVE_C_INTRIN_H) + set (INTRIN_INC_H "intrin.h") +else () + message (FATAL_ERROR "No intrinsics header found") +endif () + + +set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic +CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> +int main() { + __m128i a = _mm_set1_epi8(1); + (void)_mm_shuffle_epi8(a, a); +}" HAVE_SSSE3) + +if (NOT HAVE_SSSE3) + message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") +endif () + +# now look for AVX2 +CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> +#if !defined(__AVX2__) +#error no avx2 +#endif + +int main(){ + __m256i z = _mm256_setzero_si256(); + (void)_mm256_xor_si256(z, z); +}" HAVE_AVX2) + +if (NOT HAVE_AVX2) + message(STATUS "Building without AVX2 support") +endif () + +unset (CMAKE_REQUIRED_FLAGS) +unset (INTRIN_INC_H) From 9826522e34dcd2cfe584083cd67ad32307e18460 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 27 May 2016 10:40:06 +1000 Subject: [PATCH 213/218] rose: fix CHECK_NOT_HANDLED placement bug The CHECK_NOT_HANDLED instruction was being inserted into an already partially-flattened program, which would result in jump offsets becoming incorrect. This change places it as part of the normal flow of program construction, which avoids this issue. --- src/rose/rose_build_bytecode.cpp | 70 ++++++++++++-------------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 30db15ffb..1b00b4b5a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3019,6 +3019,25 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, program.insert(it, ri); } +static +void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, + vector &program) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED, + JumpTarget::NEXT_BLOCK); + + u32 handled_key; + if (contains(bc.handledKeys, v)) { + handled_key = bc.handledKeys.at(v); + } else { + handled_key = verify_u32(bc.handledKeys.size()); + bc.handledKeys.emplace(v, handled_key); + } + + ri.u.checkNotHandled.key = handled_key; + + program.push_back(move(ri)); +} + static vector makeProgram(RoseBuildImpl &build, build_context &bc, const RoseEdge &e) { @@ -3042,6 +3061,13 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, makeRoleCheckBounds(build, v, e, program); } + // This program may be triggered by different predecessors, with different + // offset bounds. We must ensure we put this check/set operation after the + // bounds check to deal with this case. + if (hasGreaterInDegree(1, v, g)) { + makeRoleCheckNotHandled(bc, v, program); + } + makeRoleLookaround(build, bc, v, program); makeRoleCheckLeftfix(build, bc, v, program); @@ -3228,48 +3254,6 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, *laggedRoseCount = lagIndex; } -static -void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, - vector &program) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED, - JumpTarget::NEXT_BLOCK); - - u32 handled_key; - if (contains(bc.handledKeys, v)) { - handled_key = bc.handledKeys.at(v); - } else { - handled_key = verify_u32(bc.handledKeys.size()); - bc.handledKeys.emplace(v, handled_key); - } - - ri.u.checkNotHandled.key = handled_key; - - // This program may be triggered by different predecessors, with different - // offset bounds. We must ensure we put this check/set operation after the - // bounds check to deal with this case. - auto it = - find_if(begin(program), end(program), [](const RoseInstruction &ri) { - return ri.code() > ROSE_INSTR_CHECK_BOUNDS; - }); - program.insert(it, ri); -} - -static -vector makePredProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { - const RoseGraph &g = build.g; - const RoseVertex v = target(e, g); - - auto program = makeProgram(build, bc, e); - - if (hasGreaterInDegree(1, v, g)) { - // Only necessary when there is more than one pred. - makeRoleCheckNotHandled(bc, v, program); - } - - return program; -} - static u32 addPredBlocksSingle( map>> &predProgramLists, @@ -3642,7 +3626,7 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, g[target(e, g)].idx); assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); - auto program = makePredProgram(build, bc, e); + auto program = makeProgram(build, bc, e); predProgramLists[pred_state].push_back(program); } From 614ca0accf3c825f0df0a8e532db892b68764442 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 27 May 2016 11:11:04 +1000 Subject: [PATCH 214/218] rose: always push CHECK_BOUNDS onto end of program --- src/rose/rose_build_bytecode.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 1b00b4b5a..758dd442a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3010,13 +3010,7 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, ri.u.checkBounds.min_bound = min_bound; ri.u.checkBounds.max_bound = max_bound; - // This precondition instruction should go near the start of - // the program, after the ONLY_EOD check if it's present. - auto it = - find_if(begin(program), end(program), [](const RoseInstruction &ri) { - return ri.code() > ROSE_INSTR_CHECK_ONLY_EOD; - }); - program.insert(it, ri); + program.push_back(move(ri)); } static From 428bb9a110d7a0815e9ecc6c3bb78234b062af7b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 30 May 2016 14:30:43 +1000 Subject: [PATCH 215/218] doc: correct syntax for "extended" option setting Extended syntax (ignore whitespace) is controlled with (?x) and (?-x). --- doc/dev-reference/compilation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index f3723dc9d..21254b22d 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -124,7 +124,7 @@ The following regex constructs are supported by Hyperscan: * Case-sensitivity: :regexp:`(?i)` and :regexp:`(?-i)` * Multi-line: :regexp:`(?m)` and :regexp:`(?-m)` * Dot-all: :regexp:`(?s)` and :regexp:`(?-s)` - * Extended syntax: :regexp:`(?s)` and :regexp:`(?-s)` + * Extended syntax: :regexp:`(?x)` and :regexp:`(?-x)` * The :regexp:`\\b` and :regexp:`\\B` zero-width assertions (word boundary and 'not word boundary', respectively). From d7466ff84056b03a37c79493f12ee6e8edb1559b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 30 May 2016 14:51:04 +1000 Subject: [PATCH 216/218] docs: more detail on option setting --- doc/dev-reference/compilation.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 21254b22d..8f44c15c1 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -119,12 +119,21 @@ The following regex constructs are supported by Hyperscan: * The anchors :regexp:`^`, :regexp:`$`, :regexp:`\\A`, :regexp:`\\Z` and :regexp:`\\z`. -* Option modifiers for: +* Option modifiers: - * Case-sensitivity: :regexp:`(?i)` and :regexp:`(?-i)` - * Multi-line: :regexp:`(?m)` and :regexp:`(?-m)` - * Dot-all: :regexp:`(?s)` and :regexp:`(?-s)` - * Extended syntax: :regexp:`(?x)` and :regexp:`(?-x)` + These allow behaviour to be switched on (with :regexp:`(?