Skip to content

Commit

Permalink
Merge branch 'github_develop' into github_master
Browse files Browse the repository at this point in the history
  • Loading branch information
hongyang7 committed May 25, 2020
2 parents d79973e + c758cdf commit c00683d
Show file tree
Hide file tree
Showing 31 changed files with 1,453 additions and 114 deletions.
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,29 @@

This is a list of notable changes to Hyperscan, in reverse chronological order.

## [5.3.0] 2020-05-15
- Improvement on literal matcher "Teddy" performance, including support for
Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) AVX-512
VBMI).
- Improvement on single-byte/two-byte matching performance, including support
for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512).
- hsbench: add hyphen support for -T option.
- tools/fuzz: add test scripts for synthetic pattern generation.
- Bugfix for acceleration path analysis in LimEx NFA.
- Bugfix for duplicate matches for Small-write engine.
- Bugfix for UTF8 checking problem for hscollider.
- Bugfix for issue #205: avoid crash of `hs_compile_lit_multi()` with clang and
ASAN.
- Bugfix for issue #211: fix error in `db_check_platform()` function.
- Bugfix for issue #217: fix cmake parsing issue of CPU arch for non-English
locale.
- Bugfix for issue #228: avoid undefined behavior when calling `close()` after
`fdopendir()` in `loadExpressions()`.
- Bugfix for issue #239: fix hyperscan compile issue under gcc-10.
- Add VLAN packets processing capability in pcap analysis script. (#214)
- Avoid extra convert instruction for "Noodle". (#221)
- Add Hyperscan version marcro in `hs.h`. (#222)

## [5.2.1] 2019-10-13
- Bugfix for issue #186: fix compile issue when `BUILD_SHARED_LIBS` is on in
release mode.
Expand Down
22 changes: 14 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
project (hyperscan C CXX)

set (HS_MAJOR_VERSION 5)
set (HS_MINOR_VERSION 2)
set (HS_PATCH_VERSION 1)
set (HS_MINOR_VERSION 3)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})

set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
Expand Down Expand Up @@ -187,9 +187,9 @@ else()
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "Known" POS)
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
string(FIND "${_GCC_OUTPUT}" "march" POS)
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
GNUCC_ARCH "${_GCC_OUTPUT}")

# test the parsed flag
Expand Down Expand Up @@ -326,7 +326,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
set (FAT_RUNTIME_REQUISITES TRUE)
endif()
endif()
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
endif ()

include (${CMAKE_MODULE_PATH}/arch.cmake)
Expand All @@ -340,7 +340,7 @@ if (NOT WIN32)
set(C_FLAGS_TO_CHECK
# Variable length arrays are way bad, most especially at run time
"-Wvla"
# Pointer arith on void pointers is doing it wong.
# Pointer arith on void pointers is doing it wrong.
"-Wpointer-arith"
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes
"-Wstrict-prototypes"
Expand Down Expand Up @@ -383,7 +383,7 @@ if (CC_PAREN_EQUALITY)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
endif()

# clang compains about unused const vars in our Ragel-generated code.
# clang complains about unused const vars in our Ragel-generated code.
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
if (CXX_UNUSED_CONST_VAR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
Expand Down Expand Up @@ -418,6 +418,12 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS)
# gcc5 complains about this
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)

# gcc 10 complains about this
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
if(CC_STRINGOP_OVERFLOW)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
endif()

endif()

include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
Expand Down
12 changes: 12 additions & 0 deletions cmake/arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ int main(){
(void)_mm512_abs_epi8(z);
}" HAVE_AVX512)

# and now for AVX512VBMI
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
#if !defined(__AVX512VBMI__)
#error no avx512vbmi
#endif
int main(){
__m512i a = _mm512_set1_epi8(0xFF);
__m512i idx = _mm512_set_epi64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL);
(void)_mm512_permutexvar_epi8(idx, a);
}" HAVE_AVX512VBMI)

if (FAT_RUNTIME)
if (NOT HAVE_SSSE3)
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
Expand Down
12 changes: 6 additions & 6 deletions doc/dev-reference/compilation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Hyperscan provides support for targeting a database at a particular CPU
platform; see :ref:`instr_specialization` for details.

=====================
Compile Pure Literals
Compile Pure Literals
=====================

Pure literal is a special case of regular expression. A character sequence is
Expand All @@ -75,12 +75,12 @@ characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
While in pure literal case, all these meta characters lost extra meanings
expect for that they are just common ASCII codes.

Hyperscan is initially designed to process common regualr expressions. It is
Hyperscan is initially designed to process common regular expressions. It is
hence embedded with a complex parser to do comprehensive regular grammer
interpretion. Particularly, the identification of above meta characters is the
basic step for the interpretion of far more complex regular grammers.

However in real cases, patterns may not always be regualr expressions. They
However in real cases, patterns may not always be regular expressions. They
could just be pure literals. Problem will come if the pure literals contain
regular meta characters. Supposing fed directly into traditional Hyperscan
compile API, all these meta characters will be interpreted in predefined ways,
Expand All @@ -98,7 +98,7 @@ In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns
#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a
pattern database. All of the supplied patterns will be scanned for
concurrently at scan time, with user-supplied identifiers returned when they
match.
match.

These 2 APIs are designed for use cases where all patterns contained in the
target rule set are pure literals. Users can pass the initial pure literal
Expand All @@ -110,8 +110,8 @@ Hyperscan needs to locate the end position of the input expression via clearly
knowing each literal's length, not by simply identifying character ``\0`` of a
string.

Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`,
:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`.
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
:c:member:`HS_FLAG_SOM_LEFTMOST`.

.. note:: We don't support literal compilation API with :ref:`extparam`. And
for runtime implementation, traditional runtime APIs can still be
Expand Down
2 changes: 1 addition & 1 deletion doc/dev-reference/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ instead of potentially executing illegal instructions. The API function
:c:func:`hs_valid_platform` can be used by application writers to determine if
the current platform is supported by Hyperscan.

At of this release, the variants of the runtime that are built, and the CPU
As of this release, the variants of the runtime that are built, and the CPU
capability that is required, are the following:

+----------+-------------------------------+---------------------------+
Expand Down
13 changes: 6 additions & 7 deletions src/compiler/compiler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2019, Intel Corporation
* Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -125,7 +125,7 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET/MULTILINE

if (flags & ~HS_FLAG_ALL) {
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
Expand Down Expand Up @@ -402,19 +402,18 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
}

// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
if (expLength > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit.");
}

// filter out flags not supported by pure literal API.
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
HS_FLAG_QUIET;
HS_FLAG_QUIET | HS_FLAG_MULTILINE;

if (flags & not_supported) {
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
"supported in literal API.");
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_SINGLEMATCH and "
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
}

// This expression must be a pure literal, we can build ue2_literal
Expand Down
4 changes: 2 additions & 2 deletions src/database.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
static
hs_error_t db_check_platform(const u64a p) {
if (p != hs_current_platform
&& p != hs_current_platform_no_avx2
&& p != hs_current_platform_no_avx512) {
&& p != (hs_current_platform | hs_current_platform_no_avx2)
&& p != (hs_current_platform | hs_current_platform_no_avx512)) {
return HS_DB_PLATFORM_ERROR;
}
// passed all checks
Expand Down
Loading

0 comments on commit c00683d

Please sign in to comment.