diff --git a/Translit.xcodeproj/project.pbxproj b/Translit.xcodeproj/project.pbxproj index f812650..b38e415 100644 --- a/Translit.xcodeproj/project.pbxproj +++ b/Translit.xcodeproj/project.pbxproj @@ -39,9 +39,9 @@ 445D1E5F2AFE35AA00FA1C07 /* InputController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 445D1E5E2AFE35AA00FA1C07 /* InputController.mm */; }; 445D1E622AFE364900FA1C07 /* Transliterator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 445D1E602AFE364900FA1C07 /* Transliterator.cpp */; }; 445D1E662AFFA1F700FA1C07 /* InputMethodKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 445D1E652AFFA1F700FA1C07 /* InputMethodKit.framework */; }; - 4475AD322B11F97F008DA122 /* TestStateMachine.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4475AD312B11F97F008DA122 /* TestStateMachine.mm */; }; - 44C675812B021410003A5BDE /* TransliteratorRegistry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 44C6757F2B021410003A5BDE /* TransliteratorRegistry.cpp */; }; - 44C675822B021410003A5BDE /* TransliteratorRegistry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 44C6757F2B021410003A5BDE /* TransliteratorRegistry.cpp */; }; + 4475AD322B11F97F008DA122 /* TestPrefixMapper.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4475AD312B11F97F008DA122 /* TestPrefixMapper.mm */; }; + 44B947CF2B4698E500B68C7E /* TestPerf.mm in Sources */ = {isa = PBXBuildFile; fileRef = 44B947CE2B4698E500B68C7E /* TestPerf.mm */; }; + 44B947D12B469B4C00B68C7E /* TestMapper.mm in Sources */ = {isa = PBXBuildFile; fileRef = 44B947D02B469B4C00B68C7E /* TestMapper.mm */; }; 44C675842B02153F003A5BDE /* TestRu.mm in Sources */ = {isa = PBXBuildFile; fileRef = 44C675832B02153F003A5BDE /* TestRu.mm */; }; 44C675922B0219E9003A5BDE /* unicode_mappings.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 44C675902B0219DA003A5BDE /* unicode_mappings.cpp */; }; 44C675B32B021AE9003A5BDE /* libsys_string.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 44C675892B0218E1003A5BDE /* libsys_string.a */; }; @@ -128,13 +128,14 @@ 445D1E712AFFFAB800FA1C07 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 445D1E732AFFFAE600FA1C07 /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/InfoPlist.strings; sourceTree = ""; }; 446DBB8E2B00BD53000B76EC /* TranslitTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = TranslitTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; - 4475AD302B11EEE4008DA122 /* StateMachine.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = StateMachine.hpp; sourceTree = ""; }; - 4475AD312B11F97F008DA122 /* TestStateMachine.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = TestStateMachine.mm; sourceTree = ""; }; + 4475AD302B11EEE4008DA122 /* Mapper.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Mapper.hpp; sourceTree = ""; }; + 4475AD312B11F97F008DA122 /* TestPrefixMapper.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = TestPrefixMapper.mm; sourceTree = ""; }; 4475AD332B131F80008DA122 /* Main.xctestplan */ = {isa = PBXFileReference; lastKnownFileType = text; path = Main.xctestplan; sourceTree = ""; }; 4475AD342B132242008DA122 /* Perf.xctestplan */ = {isa = PBXFileReference; lastKnownFileType = text; path = Perf.xctestplan; sourceTree = ""; }; + 449738F62B44FA4300C7FAA9 /* MultiMatch.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = MultiMatch.hpp; sourceTree = ""; }; + 44B947CE2B4698E500B68C7E /* TestPerf.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = TestPerf.mm; sourceTree = ""; }; + 44B947D02B469B4C00B68C7E /* TestMapper.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = TestMapper.mm; sourceTree = ""; }; 44C6757E2B01B4FD003A5BDE /* TableRU.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = TableRU.hpp; sourceTree = ""; }; - 44C6757F2B021410003A5BDE /* TransliteratorRegistry.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = TransliteratorRegistry.cpp; sourceTree = ""; }; - 44C675802B021410003A5BDE /* TransliteratorRegistry.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = TransliteratorRegistry.hpp; sourceTree = ""; }; 44C675832B02153F003A5BDE /* TestRu.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = TestRu.mm; sourceTree = ""; }; 44C675892B0218E1003A5BDE /* libsys_string.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsys_string.a; sourceTree = BUILT_PRODUCTS_DIR; }; 44C675902B0219DA003A5BDE /* unicode_mappings.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = unicode_mappings.cpp; sourceTree = ""; }; @@ -337,11 +338,10 @@ 445D1E4B2AFE2C6C00FA1C07 /* AppDelegate.mm */, 44C675C62B034E17003A5BDE /* MenuProtocol.hpp */, 445D1E5E2AFE35AA00FA1C07 /* InputController.mm */, - 4475AD302B11EEE4008DA122 /* StateMachine.hpp */, + 4475AD302B11EEE4008DA122 /* Mapper.hpp */, + 449738F62B44FA4300C7FAA9 /* MultiMatch.hpp */, 445D1E602AFE364900FA1C07 /* Transliterator.cpp */, 445D1E612AFE364900FA1C07 /* Transliterator.hpp */, - 44C6757F2B021410003A5BDE /* TransliteratorRegistry.cpp */, - 44C675802B021410003A5BDE /* TransliteratorRegistry.hpp */, 44C6757E2B01B4FD003A5BDE /* TableRU.hpp */, 44C675B92B02635A003A5BDE /* TableHE.hpp */, 442D25802B044EE000204800 /* MappingsWindowController.hpp */, @@ -356,8 +356,10 @@ 446DBB8F2B00BD53000B76EC /* tests */ = { isa = PBXGroup; children = ( - 4475AD312B11F97F008DA122 /* TestStateMachine.mm */, + 4475AD312B11F97F008DA122 /* TestPrefixMapper.mm */, + 44B947D02B469B4C00B68C7E /* TestMapper.mm */, 44C675832B02153F003A5BDE /* TestRu.mm */, + 44B947CE2B4698E500B68C7E /* TestPerf.mm */, 442D25BD2B0B2D2C00204800 /* TestCommon.hpp */, 442D25BE2B0B2D6600204800 /* TestCommon.mm */, 4475AD332B131F80008DA122 /* Main.xctestplan */, @@ -683,7 +685,6 @@ files = ( 442D25CC2B0CC97100204800 /* Uninstall.mm in Sources */, 442D25AC2B08339F00204800 /* AboutWindowController.mm in Sources */, - 44C675812B021410003A5BDE /* TransliteratorRegistry.cpp in Sources */, 445D1E532AFE2C6F00FA1C07 /* main.mm in Sources */, 442D25822B044EE000204800 /* MappingsWindowController.mm in Sources */, 445D1E622AFE364900FA1C07 /* Transliterator.cpp in Sources */, @@ -696,9 +697,10 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 44C675822B021410003A5BDE /* TransliteratorRegistry.cpp in Sources */, - 4475AD322B11F97F008DA122 /* TestStateMachine.mm in Sources */, + 4475AD322B11F97F008DA122 /* TestPrefixMapper.mm in Sources */, 442D25B42B0A2E1B00204800 /* Transliterator.cpp in Sources */, + 44B947D12B469B4C00B68C7E /* TestMapper.mm in Sources */, + 44B947CF2B4698E500B68C7E /* TestPerf.mm in Sources */, 44C675842B02153F003A5BDE /* TestRu.mm in Sources */, 442D25BF2B0B2D6600204800 /* TestCommon.mm in Sources */, ); diff --git a/Translit/src/InputController.mm b/Translit/src/InputController.mm index eec58c8..f6356cc 100644 --- a/Translit/src/InputController.mm +++ b/Translit/src/InputController.mm @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "Transliterator.hpp" -#include "TransliteratorRegistry.hpp" #include "AppDelegate.hpp" #include "MenuProtocol.hpp" #include "MappingsWindowController.hpp" @@ -14,7 +13,7 @@ @interface InputController : IMKInputController @interface InputController() { - Transliterator * _transliterator; + std::unique_ptr _transliterator; MappingsWindowController * _mappingsController; NSString * _currentLanguage; } @@ -29,8 +28,7 @@ -(id) initWithServer:(IMKServer*)server delegate:(id)delegate client:(idclear(); + _transliterator = std::make_unique(_currentLanguage); } return self; @@ -98,8 +96,7 @@ -(void) setValue:(id)value forTag:(long)tag client:(id)sender sys_string prefix = sys_string(NSBundle.mainBundle.bundleIdentifier) + S("."); _currentLanguage = val.remove_prefix(prefix).ns_str(); os_log_info(OS_LOG_DEFAULT, "Setting language to %{public}@", _currentLanguage); - _transliterator = &getTransliterator(_currentLanguage); - _transliterator->clear(); + _transliterator = std::make_unique(_currentLanguage); if (_mappingsController) _mappingsController.language = _currentLanguage; } diff --git a/Translit/src/Mapper.hpp b/Translit/src/Mapper.hpp new file mode 100644 index 0000000..26c1043 --- /dev/null +++ b/Translit/src/Mapper.hpp @@ -0,0 +1,101 @@ +// Copyright (c) 2023, Eugene Gershnik +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef TRANSLIT_HEADER_MAPPER_HPP_INCLUDED +#define TRANSLIT_HEADER_MAPPER_HPP_INCLUDED + +#include "MultiMatch.hpp" + +template +struct Mapping { + const T dst; + const CTString src; + + constexpr Mapping(T c, const Char (&arr)[N + 1]) noexcept: + dst(c), + src(arr) + {} +}; + +template +Mapping(T c, const Char (&arr)[N]) -> Mapping; + +template +struct Value { + const T value; + + constexpr Value(T v) noexcept: + value(v) + {} +}; + + +template +struct PrefixMappingResult { + /** + End of match + If !payload always stays at the start of input + */ + It next; + /** The mapping of the match, if successful */ + std::optional payload; + /** Whether the answer is definite and won't change with larger input */ + bool definite; +}; + +template +constexpr auto nullPrefixMapper(const Range & range) { + return PrefixMappingResult>{std::ranges::begin(range), std::nullopt, true}; +} + +template +requires(SameCharType && + (std::is_same_v && ...) && + std::is_same_v, CharTypeOf>) +constexpr auto makePrefixMapper() { + + using Payload = std::remove_const_t; + using Char = CharTypeOf; + + auto func = [](const Range & range) { + using Iterator = std::ranges::iterator_t; + + static constexpr auto multiMatch = makeMultiMatch(); + static constexpr Payload mappings[1 + sizeof...(Rest)] = {First.dst, Rest.dst...}; + + auto res = prefixMatch(multiMatch, range); + if (res.index != multiMatch.noMatch) + return PrefixMappingResult{res.next, mappings[res.index], res.definite}; + return PrefixMappingResult{res.next, std::nullopt, res.definite}; + }; + + return func; +} + +template +requires(SameCharType && + std::is_same_v && + (std::is_same_v && ...) && + std::is_same_v, CharTypeOf>) +constexpr auto makeMapper() { + + using Payload = std::remove_const_t; + using Char = CharTypeOf; + + auto func = [](const Range & range) { + using Iterator = std::ranges::iterator_t; + + static constexpr auto multiMatch = makeMultiMatch(); + static constexpr Payload mappings[2 + sizeof...(Rest)] = {First.dst, Rest.dst..., Default.value}; + + auto res = match(multiMatch, range); + return mappings[res]; + }; + + return func; +} + + +#endif + + diff --git a/Translit/src/MultiMatch.hpp b/Translit/src/MultiMatch.hpp new file mode 100644 index 0000000..a8cb8a7 --- /dev/null +++ b/Translit/src/MultiMatch.hpp @@ -0,0 +1,393 @@ +// Copyright (c) 2023, Eugene Gershnik +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef TRANSLIT_HEADER_MULTI_MATCH_HPP_INCLUDED +#define TRANSLIT_HEADER_MULTI_MATCH_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include + +template +struct CTString { + using char_type = Char; + + Char chars[N + 1]; + + constexpr CTString(const Char (&src)[N + 1]) noexcept + { + std::copy(src, src + N + 1, chars); + } + + constexpr auto size() const -> size_t { return N; } + + constexpr auto operator[](size_t i) const -> Char { return chars[i]; } + + friend constexpr bool operator==(const CTString & lhs, const CTString & rhs) { + return std::equal(lhs.chars, lhs.chars + N, rhs.chars); + } + + constexpr auto begin() const { return chars; } + constexpr auto end() const { return chars + N; } +}; + +template +CTString(const Char (&src)[N]) -> CTString; + + +template +constexpr bool SameCharType = (std::is_same_v && ...); + +template +using CharTypeOf = decltype(First)::char_type; + +template +class StaticVector { +private: + using Array = std::array; +public: + using iterator = Array::iterator; + using const_iterator = Array::const_iterator; + + constexpr auto begin() const { return m_buf.begin(); } + constexpr auto begin() { return m_buf.begin(); } + constexpr auto end() const { return m_buf.begin() + m_size; } + constexpr auto end() { return m_buf.begin() + m_size; } + constexpr auto size() const { return m_size; } + constexpr auto operator[](size_t i) -> T & { return m_buf[i]; } + constexpr auto operator[](size_t i) const -> const T & { return m_buf[i]; } + + constexpr auto insert(const_iterator where, const T & val) { + auto p = end(); + for(; p != where; --p) { + *p = *(p - 1); + } + *p = val; + ++m_size; + return p; + } + constexpr void push_back(const T & val) { + m_buf[m_size++] = val; + } + +private: + size_t m_size = 0; + Array m_buf{}; +}; + +namespace Impl { + + template + struct Inventory { + struct State { + static constexpr size_t notPresent = size_t(-1); + + std::basic_string_view str; + size_t index = notPresent; + size_t payloadIdx = notPresent; + bool successful = false; + bool final = false; + + constexpr friend auto operator<(const State & lhs, const State & rhs) { + return lhs.str < rhs.str; + } + }; + + StaticVector inputs; + StaticVector states; + size_t outcomeCount = 0; + }; + + template + requires(SameCharType) + consteval auto makeInventory() { + + constexpr size_t maxSize = 1 + (First.size() + ... + Rest.size()); + + Inventory, maxSize> inventory; + using State = decltype(inventory)::State; + + inventory.states.push_back({.final = true}); + + constexpr std::basic_string_view> strings[] = + { {First.begin(), First.size()}, {Rest.begin(), Rest.size()}... }; + + for(size_t idx = 0; idx < std::size(strings); ++idx) { + + auto string = strings[idx]; + + for (auto c: string) { + auto it = std::lower_bound(inventory.inputs.begin(), inventory.inputs.end(), c); + if (it == inventory.inputs.end() || *it != c) + inventory.inputs.insert(it, c); + } + + for (size_t i = 1; i < string.size(); ++i) { + State value{.str = {string.begin(), string.begin() + i}, .index = State::notPresent, .payloadIdx = State::notPresent, .successful = false, .final = false}; + auto it = std::lower_bound(inventory.states.begin(), inventory.states.end(), value); + if (it == inventory.states.end() || it->str != value.str) { + it = inventory.states.insert(it, value); + } else { + it->final = false; + } + } + + State value{.str = {string.begin(), string.end()}, .index = State::notPresent, .payloadIdx = idx, .successful = true, .final = true}; + auto it = std::lower_bound(inventory.states.begin(), inventory.states.end(), value); + if (it == inventory.states.end() || it->str != value.str) { + it = inventory.states.insert(it, value); + } else { + it->successful = true; + it->payloadIdx = idx; + } + } + + inventory.outcomeCount = 0; + size_t intermediateCount = inventory.states.size(); + for(auto & state: inventory.states) { + if (state.successful) { + state.index = inventory.outcomeCount++; + } else { + state.index = --intermediateCount; + } + } + + return inventory; + } + + struct Sizes { + size_t inputs; + size_t states; + size_t outcomes; + size_t noMatch; + }; + + template + class Outcome { + public: + constexpr Outcome() noexcept = default; + + constexpr Outcome(SizeType value, bool final) noexcept : + m_value(value | (SizeType(final) << (sizeof(SizeType) * CHAR_BIT - 1))) + {} + + constexpr bool final() const noexcept + { return bool(m_value >> (sizeof(SizeType) * CHAR_BIT - 1)); } + constexpr SizeType value() const noexcept + { return m_value & ~(SizeType(1) << (sizeof(SizeType) * CHAR_BIT - 1)); } + + template + static constexpr bool isSufficientFor() { + return MaxValue <= ~(SizeType(1) << (sizeof(SizeType) * CHAR_BIT - 1)); + } + private: + SizeType m_value = 0; + }; +} + +template +requires(Sizes.outcomes > 0) +struct MultiMatch { + static constexpr size_t noMatch = Sizes.noMatch; + + using CharType = Char; + using SizeType = std::conditional_t::isSufficientFor(), unsigned char, + std::conditional_t::isSufficientFor(), unsigned short, + std::conditional_t::isSufficientFor(), unsigned int, + std::conditional_t::isSufficientFor(), unsigned long, + std::conditional_t::isSufficientFor(), unsigned long long, + void>>>>>; + static_assert(!std::is_same_v, "Number of states cannot fit in any supported type"); + + using OutcomeType = Impl::Outcome; + + static constexpr SizeType noState = SizeType(-1); + + + std::array inputs; + std::array outcomes; + SizeType startState; + std::array transitions; +}; + +template +requires(SameCharType) +consteval auto makeMultiMatch() { + + constexpr auto inventory = Impl::makeInventory(); + constexpr Impl::Sizes sizes{inventory.inputs.size(), inventory.states.size(), inventory.outcomeCount, 1 + sizeof...(Rest)}; + MultiMatch, sizes> ret{}; + + using SizeType = decltype(ret)::SizeType; + using OutcomeType = decltype(ret)::OutcomeType; + + std::copy(inventory.inputs.begin(), inventory.inputs.end(), ret.inputs.begin()); + for(auto & state: inventory.states) { + if (state.successful) { + ret.outcomes[state.index] = OutcomeType{SizeType(state.payloadIdx), state.final}; + } + } + ret.startState = inventory.states[0].index; + + std::fill(ret.transitions.begin(), ret.transitions.end(), ret.noState); + std::vector stateStack({0}); + for(size_t i = 1; i < inventory.states.size(); ++i) { + auto & state = inventory.states[i]; + auto newChar = state.str.back(); + + auto it = std::lower_bound(inventory.inputs.begin(), inventory.inputs.end(), newChar); + if (it == inventory.inputs.end() || *it != newChar) + throw std::logic_error("character not present"); + auto charIdx = it - inventory.inputs.begin(); + + for ( ; ; ) { + auto & prevState = inventory.states[stateStack.back()]; + if (state.str.size() == prevState.str.size() + 1 && state.str.substr(0, state.str.size() - 1) == prevState.str) { + ret.transitions[prevState.index * inventory.inputs.size() + charIdx] = state.index; + break; + } + stateStack.pop_back(); + } + stateStack.push_back(i); + } + + + return ret; +} + +template +struct PrefixMatchResult { + /** + End of match + If index == noMatch always stays at the start of input + */ + It next; + /** The index of the successful match if successful. noMatch otherwise */ + size_t index; + /** Whether the answer is definite and won't change with larger input */ + bool definite; +}; + +template +requires(std::is_same_v, typename Matcher::CharType>) +constexpr auto prefixMatch(const Matcher & matcher, Range && r) noexcept -> PrefixMatchResult> { + + using Result = PrefixMatchResult>; + + const auto first = std::ranges::begin(r); + const auto last = std::ranges::end(r); + + auto currentState = matcher.startState; + auto lastMatchedState = matcher.noState; + auto current = first; + auto consumed = first; + bool final = true; + for( ; ; ) { + + if (currentState < matcher.outcomes.size()) { + consumed = current; + lastMatchedState = currentState; + } + + if (current == last) { + final = false; + break; + } + + typename Matcher::CharType c = *current; + auto it = std::lower_bound(matcher.inputs.begin(), matcher.inputs.end(), c); + if (it == matcher.inputs.end() || *it != c) + break; + size_t inputIdx = it - matcher.inputs.begin(); + + auto nextState = matcher.transitions[currentState * matcher.inputs.size() + inputIdx]; + if (nextState == matcher.noState) + break; + + currentState = nextState; + ++current; + } + if (lastMatchedState != matcher.noState) { + auto & outcome = matcher.outcomes[lastMatchedState]; + return Result{consumed, outcome.value(), final || outcome.final()}; + } + return Result{first, Matcher::noMatch, final || matcher.inputs.size() == 0}; +} + +template +requires(std::is_same_v, typename Matcher::CharType>) +constexpr auto match(const Matcher & matcher, Range && r) noexcept -> size_t { + + auto currentState = matcher.startState; + for(typename Matcher::CharType c: r) { + auto it = std::lower_bound(matcher.inputs.begin(), matcher.inputs.end(), c); + if (it == matcher.inputs.end() || *it != c) + return Matcher::noMatch; + size_t inputIdx = it - matcher.inputs.begin(); + + auto nextState = matcher.transitions[currentState * matcher.inputs.size() + inputIdx]; + if (nextState == matcher.noState) + return Matcher::noMatch; + + currentState = nextState; + } + if (currentState < matcher.outcomes.size()) { + auto & outcome = matcher.outcomes[currentState]; + return outcome.value(); + } + return Matcher::noMatch; +} + +#ifndef NDEBUG + + #include + #include + + template + void debugPrint(const MultiMatch & val) { + std::cout << "chars: "; + for(auto c: val.inputs) { + std::cout << char(c); + } + std::cout << "\noutcomes: "; + for(auto outcome: val.outcomes) { + std::cout << size_t(outcome.value()); + if (!outcome.final()) + std::cout << "[i]"; + std::cout << " "; + } + std::cout << "\nstart state: " << size_t(val.startState); + std::cout << "\ntransitions:\n"; + size_t maxTrSize = 0; + for(auto tr: val.transitions) { + if (tr == decltype(tr)(-1)) + maxTrSize = std::max(maxTrSize, size_t(1)); + else + maxTrSize = std::max(maxTrSize, std::to_string(size_t(tr)).size()); + } + + std::ios oldState(nullptr); + oldState.copyfmt(std::cout); + std::cout << std::setfill(' '); + for(size_t y = 0; y < Sizes.states; ++y) { + for(size_t x = 0; x < Sizes.inputs; ++x) { + auto tr = val.transitions[y * Sizes.inputs + x]; + std::cout << std::setw(int(maxTrSize)); + if (tr == decltype(tr)(-1)) + std::cout << '*'; + else + std::cout << size_t(tr); + std::cout << ' '; + } + std::cout << '\n'; + } + std::cout << '\n'; + std::cout.copyfmt(oldState); + } + +#endif + +#endif diff --git a/Translit/src/StateMachine.hpp b/Translit/src/StateMachine.hpp deleted file mode 100644 index 2687d25..0000000 --- a/Translit/src/StateMachine.hpp +++ /dev/null @@ -1,292 +0,0 @@ -// Copyright (c) 2023, Eugene Gershnik -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef TRANSLIT_HEADER_STATE_MACHINE_HPP_INCLUDED -#define TRANSLIT_HEADER_STATE_MACHINE_HPP_INCLUDED - -#include -#include -#include -#include -#include -#include - - -template -class StateMachine { - -private: - static inline constexpr LengthType noTransition = LengthType(-1); - -private: - struct Outcome { - PayloadType payload; - bool successful: 1; - bool final: 1; - }; - enum class OutcomeType { - final, - nonfinal, - intermediate - }; - struct OutcomeDescriptor { - OutcomeDescriptor(LengthType idx_, OutcomeType type_, PayloadType payload_): - idx(idx_), type(type_), payload(payload_) - {} - LengthType idx; - OutcomeType type; - PayloadType payload; - }; - - struct Expanded { - std::vector inputs; - std::vector outcomes; - std::vector transitions; - }; - -public: - template - requires(std::is_convertible_v>, PayloadType> && - std::is_convertible_v>, const Char *>) - StateMachine(Range && range) { - - if (std::ranges::empty(range)) - return; - - Expanded expanded; - - //Step 1: Populate inputs and map of input sequences to outcomes and "empty outcome" - std::map, OutcomeDescriptor> outcomesMap; - size_t terminalCount = 1; - OutcomeDescriptor emptyOutcome{noTransition, OutcomeType::intermediate, PayloadType{}}; - - for(auto [dst, src]: range) { - - if (!*src) { - assert(emptyOutcome.type == OutcomeType::intermediate); - emptyOutcome.type = OutcomeType::final; - emptyOutcome.payload = dst; - continue; - } - - for(const Char * p = src; *p; ++p) { - - { - const Char input = *p; - auto it = std::lower_bound(expanded.inputs.begin(), expanded.inputs.end(), input); - if (it == expanded.inputs.end() || *it != input) { - expanded.inputs.insert(it, input); - } - } - - { - OutcomeType newType = p[1] ? OutcomeType::intermediate : OutcomeType::final; - terminalCount += !p[1]; - auto [it, inserted] = outcomesMap.emplace(std::piecewise_construct, - std::forward_as_tuple(src, p + 1), - std::forward_as_tuple(noTransition, newType, dst)); - if (!inserted) { - if (newType == OutcomeType::final) { - assert(it->second.type != OutcomeType::final); //if it already exists it must be non final! - it->second.payload = dst; - terminalCount -= (it->second.type != OutcomeType::intermediate); //avoid double counting! - it->second.type = OutcomeType::nonfinal; - } else if (it->second.type == OutcomeType::final) { - it->second.type = OutcomeType::nonfinal; - } - } - } - } - } - - assert(terminalCount < size_t(std::numeric_limits::max() - 1)); - - //Step 2: Write out empty outcome and non-intermediate outcomes. Populate outcome indices in map - - expanded.outcomes.reserve(terminalCount); - size_t stateCount = terminalCount; - expanded.outcomes.push_back({ - emptyOutcome.payload, - emptyOutcome.type != OutcomeType::intermediate, - emptyOutcome.type == OutcomeType::final - }); - for(auto & entry: outcomesMap) { - if (entry.second.type != OutcomeType::intermediate) { - entry.second.idx = static_cast(expanded.outcomes.size()); - expanded.outcomes.push_back({ - entry.second.payload, - true, - entry.second.type == OutcomeType::final - }); - } else { - entry.second.idx = stateCount++; - assert(stateCount < size_t(std::numeric_limits::max() - 1)); - } - } - assert(stateCount - 1 == outcomesMap.size()); - - //Step 3: Populate transitions table using input and outcome indices - - expanded.transitions.resize(stateCount * expanded.inputs.size(), -1); - - for(auto [dst, src]: range) { - - LengthType currentState = 0; - for(const Char * p = src; *p; ++p) { - - size_t inputIdx; - { - auto it = std::lower_bound(expanded.inputs.begin(), expanded.inputs.end(), *p); - assert(it != expanded.inputs.end() && *it == *p); - inputIdx = it - expanded.inputs.begin(); - } - - auto & nextState = expanded.transitions[currentState * expanded.inputs.size() + inputIdx]; - if (nextState != noTransition) { - currentState = nextState; - continue; - } - - auto it = outcomesMap.find({src, p + 1}); - assert(it != outcomesMap.end()); - nextState = static_cast(it->second.idx); - currentState = nextState; - } - } - - //Step 4: Compact everything into one memory block - - m_inputsEnd = expanded.inputs.size() * sizeof(expanded.inputs[0]); - m_transitionsStart = alignSize(m_inputsEnd, __alignof(expanded.transitions[0])); - m_transitionsEnd = m_transitionsStart + expanded.transitions.size() * sizeof(expanded.transitions[0]); - m_outcomesStart = alignSize(m_transitionsEnd, __alignof(expanded.outcomes[0])); - size_t compactSize = m_outcomesStart + expanded.outcomes.size() * sizeof(expanded.outcomes[0]); - m_data.resize(compactSize); - std::copy(expanded.inputs.begin(), expanded.inputs.end(), inputsBegin()); - std::copy(expanded.transitions.begin(), expanded.transitions.end(), transitionsBegin()); - std::copy(expanded.outcomes.begin(), expanded.outcomes.end(), outcomesBegin()); - } - - template - requires(std::is_convertible_v::iterator_category, std::input_iterator_tag> && - std::is_convertible_v::value_type>, PayloadType> && - std::is_convertible_v::value_type>, const Char *>) - StateMachine(ItF first, ItL last): - StateMachine(std::ranges::subrange(first, last)) - {} - - StateMachine(std::initializer_list> init): - StateMachine(init.begin(), init.end()) - {} - - StateMachine() = default; - - template - struct PrefixMatchResult { - /** - End of match - If !successful always stays at the start of input - */ - It next; - /** The payload of the successful match if successful. Undefined otherwise */ - PayloadType payload; - /** Whether the match was successfull. */ - bool successful; - /** Whether the answer is definite and won't change with larger input */ - bool definite; - }; - - template - requires(std::is_convertible_v::iterator_category, std::forward_iterator_tag> && - std::is_same_v::value_type, Char> && - std::equality_comparable_with) - auto prefixMatch(ItF first, ItL last) const noexcept -> PrefixMatchResult { - - LengthType currentState = 0; - LengthType lastMatchedState = 0; - auto current = first; - auto consumed = first; - while(current != last) { - - Char c = *current; - auto it = std::lower_bound(inputsBegin(), inputsEnd(), c); - if (it == inputsEnd() || *it != c) { - if (currentState >= outcomesSize()) - return {first, {}, false, true}; - auto & outcome = outcomesBegin()[currentState]; - return {current, outcome.payload, outcome.successful, true}; - } - size_t inputIdx = it - inputsBegin(); - - auto nextState = transitionFor(inputIdx, currentState); - if (nextState == noTransition) { - if (currentState >= outcomesSize()) - return {first, {}, false, true}; - auto & outcome = outcomesBegin()[currentState]; - return {current, outcome.payload, outcome.successful, true}; - } - - if (currentState < outcomesSize()) { - consumed = current; - lastMatchedState = currentState; - } - currentState = nextState; - ++current; - } - if (currentState >= outcomesSize()) { - if (lastMatchedState == 0) - return {first, {}, false, inputsSize() == 0}; - auto & outcome = outcomesBegin()[lastMatchedState]; - return {consumed, outcome.payload, outcome.successful, outcome.final}; - } - auto & outcome = outcomesBegin()[currentState]; - return {last, outcome.payload, outcome.successful, outcome.final}; - - } - -private: - // Round size up to next multiple of alignment. - static constexpr auto alignSize(size_t s, size_t alignment) noexcept -> size_t { - assert(s + alignment > s); - return (s + alignment - 1) & ~(alignment - 1); - } - - auto inputsBegin() const - { return reinterpret_cast(m_data.data()); } - auto inputsBegin() - { return reinterpret_cast(m_data.data()); } - auto inputsEnd() const - { return reinterpret_cast(m_data.data() + m_inputsEnd); } - auto inputsEnd() - { return reinterpret_cast(m_data.data() + m_inputsEnd); } - - auto transitionsBegin() const - { return reinterpret_cast(m_data.data() + m_transitionsStart); } - auto transitionsBegin() - { return reinterpret_cast(m_data.data() + m_transitionsStart); } - - auto outcomesBegin() const - { return reinterpret_cast(m_data.data() + m_outcomesStart); } - auto outcomesBegin() - { return reinterpret_cast(m_data.data() + m_outcomesStart); } - - - auto inputsSize() const -> LengthType - { return LengthType(m_inputsEnd / sizeof(Char)); } - auto transitionFor(LengthType inputIdx, LengthType state) const - { return transitionsBegin()[state * inputsSize() + inputIdx]; } - - auto outcomesSize() const -> LengthType - { return LengthType((m_data.size() - m_outcomesStart) / sizeof(Outcome)); } - -private: - std::vector m_data; - size_t m_inputsEnd = 0; - size_t m_transitionsStart = 0; - size_t m_transitionsEnd = 0; - size_t m_outcomesStart = 0; -}; - -#endif - - diff --git a/Translit/src/TableHE.hpp b/Translit/src/TableHE.hpp index 4355bc8..d91378f 100644 --- a/Translit/src/TableHE.hpp +++ b/Translit/src/TableHE.hpp @@ -4,39 +4,42 @@ #ifndef TRANSLIT_HEADER_TABLE_HE_HPP_INCLUDED #define TRANSLIT_HEADER_TABLE_HE_HPP_INCLUDED -constexpr std::pair g_tableHe[] = { - {u'א', u"a"}, - {u'ב', u"b"}, - {u'ב', u"v"}, - {u'ג', u"g"}, - {u'ד', u"d"}, - {u'ה', u"h"}, - {u'ו', u"o"}, - {u'ו', u"u"}, - {u'ז', u"z"}, - {u'ח', u"x"}, - {u'ט', u"T"}, - {u'י', u"i"}, - {u'י', u"j"}, - {u'כ', u"k"}, - {u'ך', u"K"}, - {u'ל', u"l"}, - {u'מ', u"m"}, - {u'ם', u"M"}, - {u'נ', u"n"}, - {u'ן', u"N"}, - {u'ס', u"s"}, - {u'ע', u"y"}, - {u'פ', u"f"}, - {u'פ', u"p"}, - {u'ף', u"F"}, - {u'ף', u"P"}, - {u'צ', u"c"}, - {u'ץ', u"C"}, - {u'ק', u"q"}, - {u'ר', u"r"}, - {u'ש', u"w"}, - {u'ת', u"t"} -}; +#include "Mapper.hpp" + +template +constexpr auto g_mapperHe = makePrefixMapper(); #endif diff --git a/Translit/src/TableRU.hpp b/Translit/src/TableRU.hpp index 5c36795..2078722 100644 --- a/Translit/src/TableRU.hpp +++ b/Translit/src/TableRU.hpp @@ -4,102 +4,105 @@ #ifndef TRANSLIT_HEADER_TABLE_RU_HPP_INCLUDED #define TRANSLIT_HEADER_TABLE_RU_HPP_INCLUDED -constexpr std::pair g_tableRu[] = { - {u'А', u"A"}, //1 - {u'а', u"a"}, - {u'Б', u"B"}, //2 - {u'б', u"b"}, - {u'В', u"V"}, //3 - {u'в', u"v"}, - {u'Г', u"G"}, //4 - {u'г', u"g"}, - {u'Д', u"D"}, //5 - {u'д', u"d"}, - {u'Е', u"E"}, //6 - {u'е', u"e"}, - {u'Ё', u"Ë"}, //7 - {u'Ё', u"Ö"}, - {u'Ё', u"JO"}, - {u'Ё', u"Jo"}, - {u'Ё', u"YO"}, - {u'Ё', u"Yo"}, - {u'ё', u"ë"}, - {u'ё', u"ö"}, - {u'ё', u"jo"}, - {u'ё', u"yo"}, - {u'Ж', u"ZH"}, //8 - {u'Ж', u"Zh"}, - {u'ж', u"zh"}, - {u'З', u"Z"}, //9 - {u'з', u"z"}, - {u'И', u"I"}, //10 - {u'и', u"i"}, - {u'Й', u"J"}, //11 - {u'й', u"j"}, - {u'К', u"K"}, //12 - {u'к', u"k"}, - {u'Л', u"L"}, //13 - {u'л', u"l"}, - {u'М', u"M"}, //14 - {u'м', u"m"}, - {u'Н', u"N"}, //15 - {u'н', u"n"}, - {u'О', u"O"}, //16 - {u'о', u"o"}, - {u'П', u"P"}, //17 - {u'п', u"p"}, - {u'Р', u"R"}, //18 - {u'р', u"r"}, - {u'С', u"S"}, //19 - {u'с', u"s"}, - {u'Т', u"T"}, //20 - {u'т', u"t"}, - {u'У', u"U"}, //21 - {u'у', u"u"}, - {u'Ф', u"F"}, //22 - {u'ф', u"f"}, - {u'Х', u"H"}, //23 - {u'Х', u"X"}, - {u'х', u"h"}, - {u'х', u"x"}, - {u'Ц', u"C"}, //24 - {u'ц', u"c"}, - {u'Ч', u"CH"}, //25 - {u'Ч', u"Ch"}, - {u'ч', u"ch"}, - {u'Ш', u"SH"}, //26 - {u'Ш', u"Sh"}, - {u'ш', u"sh"}, - {u'Щ', u"W"}, //27 - {u'Щ', u"SHH"}, - {u'Щ', u"SHh"}, - {u'Щ', u"Shh"}, - {u'щ', u"w"}, - {u'щ', u"shh"}, - {u'Ъ', u"QQ"}, //28 - {u'ъ', u"qq"}, - {u'Ы', u"Y"}, //29 - {u'ы', u"y"}, - {u'Ь', u"Q"}, //30 - {u'ь', u"q"}, - {u'Э', u"Ä"}, //31 - {u'Э', u"JE"}, - {u'Э', u"Je"}, - {u'э', u"je"}, - {u'Ю', u"Ü"}, //32 - {u'Ю', u"JU"}, - {u'Ю', u"Ju"}, - {u'Ю', u"YU"}, - {u'Ю', u"Yu"}, - {u'ю', u"ü"}, - {u'ю', u"ju"}, - {u'ю', u"yu"}, - {u'Я', u"JA"}, //33 - {u'Я', u"Ja"}, - {u'Я', u"YA"}, - {u'Я', u"Ya"}, - {u'я', u"ja"}, - {u'я', u"ya"} -}; +#include "Mapper.hpp" + +template +constexpr auto g_mapperRu = makePrefixMapper(); #endif diff --git a/Translit/src/Transliterator.cpp b/Translit/src/Transliterator.cpp index bec92bd..b1520ba 100644 --- a/Translit/src/Transliterator.cpp +++ b/Translit/src/Transliterator.cpp @@ -2,21 +2,33 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "Transliterator.hpp" +#include "TableRU.hpp" +#include "TableHE.hpp" +auto Transliterator::getMapper(const sys_string & name) -> MappingFunc * { + + static constexpr auto mapNameToMapper = makeMapper, + Mapping{(MappingFunc *)g_mapperRu, u"ru"}, + Mapping{(MappingFunc *)g_mapperHe, u"he"} + >(); + + return mapNameToMapper(sys_string::char_access(name)); +} void Transliterator::append(const sys_string & str) { sys_string::char_access strAccess(str); m_prefix.append(strAccess.begin(), strAccess.end()); m_translit.erase(m_translit.begin() + m_translitCompletedSize, m_translit.end()); - const auto begin = m_prefix.begin(); - const auto end = m_prefix.end(); + const auto begin = m_prefix.cbegin(); + const auto end = m_prefix.cend(); auto completed = begin; for (auto start = begin ; start != end; ) { - auto res = m_sm.prefixMatch(start, end); - if (res.successful) { + auto res = m_mapper(std::ranges::subrange(start, end)); + if (res.payload) { m_matchedSomething = true; - m_translit += res.payload; + m_translit += *res.payload; //if the result is not definite we don't know if a longer match is possible so bail out if (!res.definite) break; diff --git a/Translit/src/Transliterator.hpp b/Translit/src/Transliterator.hpp index 3844d1e..fea94d4 100644 --- a/Translit/src/Transliterator.hpp +++ b/Translit/src/Transliterator.hpp @@ -4,27 +4,26 @@ #ifndef TRANSLIT_HEADER_TRANSLITERATOR_HPP_INCLUDED #define TRANSLIT_HEADER_TRANSLITERATOR_HPP_INCLUDED -#include "StateMachine.hpp" +#include "Mapper.hpp" class Transliterator { -public: - using SizeType = unsigned short; private: - using StateMachineType = StateMachine; -public: - Transliterator() = default; + using Char = char16_t; + using String = std::basic_string; + using StringView = std::basic_string_view; + using Iterator = String::const_iterator; + using Range = std::ranges::subrange; + using MappingFunc = PrefixMappingResult (const Range &); - template - requires(std::is_convertible_v>, char16_t> && - std::is_convertible_v>, const char16_t *>) - Transliterator(Range && range): m_sm(range) +public: + Transliterator(const sys_string & name): m_mapper(getMapper(name)) {} void append(const sys_string & str); - auto result() const -> std::u16string_view + auto result() const -> StringView { return m_translit; } - auto completedSize() const -> SizeType + auto completedSize() const -> size_t { return m_translitCompletedSize; } auto matchedSomething() const -> bool { return m_matchedSomething; } @@ -44,12 +43,16 @@ class Transliterator { } private: - StateMachineType m_sm; + static auto getMapper(const sys_string & name) -> MappingFunc *; - std::u16string m_prefix; - std::u16string m_translit; - SizeType m_translitCompletedSize = 0; +private: + MappingFunc * m_mapper = nullPrefixMapper; + + String m_prefix; + String m_translit; + size_t m_translitCompletedSize = 0; bool m_matchedSomething = false; }; + #endif diff --git a/Translit/src/TransliteratorRegistry.cpp b/Translit/src/TransliteratorRegistry.cpp deleted file mode 100644 index d184cbc..0000000 --- a/Translit/src/TransliteratorRegistry.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) 2023, Eugene Gershnik -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "TransliteratorRegistry.hpp" -#include "TableRU.hpp" -#include "TableHE.hpp" - -static std::map g_transliterators { - { S("ru"), Transliterator(g_tableRu) }, - { S("he"), Transliterator(g_tableHe) } -}; - - -auto getTransliterator(const sys_string & name) -> Transliterator & { - return g_transliterators[name]; -} diff --git a/Translit/src/TransliteratorRegistry.hpp b/Translit/src/TransliteratorRegistry.hpp deleted file mode 100644 index e4ec01f..0000000 --- a/Translit/src/TransliteratorRegistry.hpp +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) 2023, Eugene Gershnik -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef TRANSLIT_HEADER_TRANSLITERATOR_REGISTRY_HPP_INCLUDED -#define TRANSLIT_HEADER_TRANSLITERATOR_REGISTRY_HPP_INCLUDED - -#include "Transliterator.hpp" - - -auto getTransliterator(const sys_string & name) -> Transliterator &; - -#endif diff --git a/Translit/src/pch.hpp b/Translit/src/pch.hpp index f4860b5..390815f 100644 --- a/Translit/src/pch.hpp +++ b/Translit/src/pch.hpp @@ -7,11 +7,15 @@ #include #include +#include #include #include #include #include +#include #include +#include +#include #include #include diff --git a/Translit/tests/Main.xctestplan b/Translit/tests/Main.xctestplan index d9d6b05..edf54ed 100644 --- a/Translit/tests/Main.xctestplan +++ b/Translit/tests/Main.xctestplan @@ -9,17 +9,22 @@ } ], "defaultOptions" : { + "addressSanitizer" : { + "detectStackUseAfterReturn" : true, + "enabled" : true + }, "targetForVariableExpansion" : { "containerPath" : "container:Translit.xcodeproj", "identifier" : "445D1E462AFE2C6C00FA1C07", "name" : "Translit" }, - "testTimeoutsEnabled" : true + "testTimeoutsEnabled" : true, + "undefinedBehaviorSanitizerEnabled" : true }, "testTargets" : [ { "skippedTests" : [ - "TestStateMachine\/testPerformance" + "TestPerf" ], "target" : { "containerPath" : "container:Translit.xcodeproj", diff --git a/Translit/tests/Perf.xctestplan b/Translit/tests/Perf.xctestplan index 8f5e699..046a877 100644 --- a/Translit/tests/Perf.xctestplan +++ b/Translit/tests/Perf.xctestplan @@ -9,6 +9,7 @@ } ], "defaultOptions" : { + "codeCoverage" : false, "targetForVariableExpansion" : { "containerPath" : "container:Translit.xcodeproj", "identifier" : "445D1E462AFE2C6C00FA1C07", @@ -20,7 +21,7 @@ "testTargets" : [ { "selectedTests" : [ - "TestStateMachine\/testPerformance" + "TestPerf\/testPerformance" ], "target" : { "containerPath" : "container:Translit.xcodeproj", diff --git a/Translit/tests/TestCommon.hpp b/Translit/tests/TestCommon.hpp index 5e432b0..debcd0b 100644 --- a/Translit/tests/TestCommon.hpp +++ b/Translit/tests/TestCommon.hpp @@ -11,7 +11,7 @@ struct ResultPayload { ResultPayload() = default; ResultPayload(std::u16string_view all_, - Transliterator::SizeType completedSize_, + size_t completedSize_, bool matchedSomething_): all(all_), completedSize(completedSize_), @@ -19,7 +19,7 @@ struct ResultPayload { {} std::u16string_view all; - Transliterator::SizeType completedSize; + size_t completedSize; bool matchedSomething; friend bool operator==(const ResultPayload &, const ResultPayload &) = default; diff --git a/Translit/tests/TestMapper.mm b/Translit/tests/TestMapper.mm new file mode 100644 index 0000000..86603ce --- /dev/null +++ b/Translit/tests/TestMapper.mm @@ -0,0 +1,72 @@ +// Copyright (c) 2023, Eugene Gershnik +// SPDX-License-Identifier: GPL-3.0-or-later + +#import + +#include "../src/Mapper.hpp" + +using namespace std::literals; + +@interface TestMapper : XCTestCase + +@end + +@implementation TestMapper + + +- (void)testOnlyEmptyString { + auto mapper = makeMapper(); + XCTAssertEqual(mapper(u""s), 0); + XCTAssertEqual(mapper(u"a"s), 42); +} + +- (void)testDisjointStrings { + { + auto mapper = makeMapper(); + XCTAssertEqual(mapper(u""s), 42); + XCTAssertEqual(mapper(u"a"s), 2); + XCTAssertEqual(mapper(u"b"s), 0); + XCTAssertEqual(mapper(u"c"s), 1); + XCTAssertEqual(mapper(u" "s), 42); + } + + { + auto mapper = makeMapper(); + XCTAssertEqual(mapper(u""s), 42); + XCTAssertEqual(mapper(u"a"s), 42); + XCTAssertEqual(mapper(u"b"s), 42); + XCTAssertEqual(mapper(u"cd"s), 1); + } + +} + +- (void)testOverlappingStrings { + { + auto mapper = makeMapper(); + XCTAssertEqual(mapper(u"b"s), 0); + XCTAssertEqual(mapper(u"bc"s), 42); + XCTAssertEqual(mapper(u"bcd"s), 1); + } + { + auto mapper = makeMapper(); + XCTAssertEqual(mapper(u"b"s), 42); + XCTAssertEqual(mapper(u"bc"s), 0); + XCTAssertEqual(mapper(u"bd"s), 1); + XCTAssertEqual(mapper(u"bdd"s), 2); + } + { + auto mapper = makeMapper(); + XCTAssertEqual(mapper("bd"s), 0); + XCTAssertEqual(mapper("bddc"s), 42); + XCTAssertEqual(mapper("bddq"s), 1); + } +} + +- (void)testRepeatedStrings { + constexpr auto mapper = makeMapper(); + XCTAssertEqual(mapper("ab"sv), 2); + XCTAssertEqual(mapper("cd"sv), 1); +} + + +@end diff --git a/Translit/tests/TestPerf.mm b/Translit/tests/TestPerf.mm new file mode 100644 index 0000000..af5dde0 --- /dev/null +++ b/Translit/tests/TestPerf.mm @@ -0,0 +1,66 @@ +// Copyright (c) 2023, Eugene Gershnik +// SPDX-License-Identifier: GPL-3.0-or-later + +#import + +#include "../src/TableRU.hpp" + +@interface TestPerf : XCTestCase + +@end + +@implementation TestPerf + +- (void)testPerformance { + auto dataUrl = [[NSURL fileURLWithPath:@( __FILE__ )].URLByDeletingLastPathComponent URLByAppendingPathComponent:@"PerfData"]; + NSError * err; + auto nsstr = [NSString stringWithContentsOfURL:dataUrl encoding:NSUTF8StringEncoding error:&err]; + XCTAssertNil(err); + std::u16string str(nsstr.length, u'\0'); + [nsstr getCharacters:(unichar *)str.data()]; + + auto mapper = g_mapperRu>; + + __block volatile char16_t sink; + __block intptr_t diff = 0; + + [self measureBlock:^{ + auto begin = str.begin(); + auto end = str.end(); + auto completed = begin; + for (auto start = begin ; start != end; ) { + auto res = mapper(std::ranges::subrange(start, end)); + if (res.payload) { + //m_matchedSomething = true; + //m_translit += res.payload; + sink = *res.payload; + //if the result is not definite we don't know if a longer match is possible so bail out + if (!res.definite) + break; + //otherwise mark it as completed and continue + start = res.next; + //++m_translitCompletedSize; + completed = start; + } else if (!res.definite) { + //no match but could be with more input, bail out + //m_matchedSomething = true; + //m_translit.append(start, end); + for(auto it = start; it != end; ++it) + sink = *it; + break; + } else { + //no match and couldn't be + //consume 1 untranslated char and continue + //m_translit += *start; + sink = *start; + ++start; + //++m_translitCompletedSize; + completed = start; + } + } + diff = end - completed; + }]; + XCTAssertTrue(diff >= 0 && diff < 2); +} + +@end diff --git a/Translit/tests/TestPrefixMapper.mm b/Translit/tests/TestPrefixMapper.mm new file mode 100644 index 0000000..726f755 --- /dev/null +++ b/Translit/tests/TestPrefixMapper.mm @@ -0,0 +1,224 @@ +// Copyright (c) 2023, Eugene Gershnik +// SPDX-License-Identifier: GPL-3.0-or-later + +#import + +#include "../src/Mapper.hpp" + +using namespace std::literals; + +@interface TestPrefixMapper : XCTestCase + +@end + +@implementation TestPrefixMapper + +- (void)setUp { + // Put setup code here. This method is called before the invocation of each test method in the class. +} + +- (void)tearDown { + // Put teardown code here. This method is called after the invocation of each test method in the class. +} + +- (void)testEmpty { + auto mapper = nullPrefixMapper; + { + auto str = u""s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin()); + } + { + auto str = u"a"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin()); + } +} + +- (void)testOnlyEmptyString { + auto mapper = makePrefixMapper(); + { + auto str = u""s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 0); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin()); + } + { + auto str = u"a"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 0); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin()); + } +} + +- (void)testDisjointStrings { + { + + auto mapper = makePrefixMapper(); + { + auto str = u""s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, false); + XCTAssertEqual(res.next, str.begin()); + } + { + auto str = u"a"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 2); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.end()); + } + { + auto str = u"b"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 0); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.end()); + } + { + auto str = u"c"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 1); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.end()); + } + { + auto str = u" "s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin()); + } + } + + { + auto mapper = makePrefixMapper(); + { + auto str = u""s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, false); + XCTAssertEqual(res.next, str.begin()); + } + { + auto str = u"a"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, false); + XCTAssertEqual(res.next, str.begin()); + } + { + auto str = u"b"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin()); + } + { + auto str = u"cd"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 1); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.end()); + } + } + +} + +- (void)testOverlappingStrings { + { + auto mapper = makePrefixMapper(); + { + auto str = u"b"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 0); + XCTAssertEqual(res.definite, false); + XCTAssertEqual(res.next, str.begin() + 1); + } + { + auto str = u"bc"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 0); + XCTAssertEqual(res.definite, false); + XCTAssertEqual(res.next, str.begin() + 1); + } + { + auto str = u"bcd"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 1); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin() + 3); + } + } + { + auto mapper = makePrefixMapper(); + { + auto str = u"b"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, std::nullopt); + XCTAssertEqual(res.definite, false); + XCTAssertEqual(res.next, str.begin()); + } + { + auto str = u"bc"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 0); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin() + 2); + } + { + auto str = u"bd"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 1); + XCTAssertEqual(res.definite, false); + XCTAssertEqual(res.next, str.begin() + 2); + } + { + auto str = u"bdd"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 2); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin() + 3); + } + } + { + auto mapper = makePrefixMapper(); + { + auto str = "bddc"s; + auto res = mapper(str); + XCTAssertEqual(res.payload, 0); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.begin() + 2); + } + + } +} + +- (void)testRepeatedStrings { + constexpr auto mapper = makePrefixMapper(); + + { + constexpr auto str = "ab"sv; + auto res = mapper(str); + XCTAssertEqual(res.payload, 2); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.end()); + } + { + constexpr auto str = "cd"sv; + auto res = mapper(str); + XCTAssertEqual(res.payload, 1); + XCTAssertEqual(res.definite, true); + XCTAssertEqual(res.next, str.end()); + } +} + + +@end diff --git a/Translit/tests/TestRu.mm b/Translit/tests/TestRu.mm index 702f102..203eee6 100644 --- a/Translit/tests/TestRu.mm +++ b/Translit/tests/TestRu.mm @@ -1,7 +1,6 @@ // Copyright (c) 2023, Eugene Gershnik // SPDX-License-Identifier: GPL-3.0-or-later -#include "../src/TransliteratorRegistry.hpp" #include "TestCommon.hpp" @@ -14,11 +13,11 @@ @interface TestRu : XCTestCase @implementation TestRu { - Transliterator * _tr; + std::unique_ptr _tr; } - (void)setUp { - _tr = &getTransliterator(S("ru")); + _tr = std::make_unique(S("ru")); } - (void)tearDown { diff --git a/Translit/tests/TestStateMachine.mm b/Translit/tests/TestStateMachine.mm deleted file mode 100644 index cbf6f08..0000000 --- a/Translit/tests/TestStateMachine.mm +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (c) 2023, Eugene Gershnik -// SPDX-License-Identifier: GPL-3.0-or-later - -#import - -#include "../src/StateMachine.hpp" -#include "../src/TableRU.hpp" - -@interface TestStateMachine : XCTestCase - -@end - -@implementation TestStateMachine - -- (void)setUp { - // Put setup code here. This method is called before the invocation of each test method in the class. -} - -- (void)tearDown { - // Put teardown code here. This method is called after the invocation of each test method in the class. -} - -- (void)testEmpty { - StateMachine sm; - { - const char * const str = ""; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str); - } - { - const char * const str = "a"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str); - } -} - -- (void)testOnlyEmptyString { - StateMachine sm({{0, ""}}); - { - const char * const str = ""; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 0); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str); - } - { - const char * str = "a"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 0); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str); - } -} - -- (void)testDisjointStrings { - { - - StateMachine sm({{0, "b"}, {1, "c"}, {2, "a"}}); - { - const char * const str = ""; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, false); - XCTAssertEqual(res.next, str); - } - { - const char * const str = "a"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 2); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str + strlen(str)); - } - { - const char * const str = "b"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 0); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str + strlen(str)); - } - { - const char * const str = "c"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 1); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str + strlen(str)); - } - { - const char * const str = " "; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str); - } - } - - { - StateMachine sm({{0, "ef"}, {1, "cd"}, {2, "ab"}}); - { - const char * const str = ""; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, false); - XCTAssertEqual(res.next, str); - } - { - const char * const str = "a"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, false); - XCTAssertEqual(res.next, str); - } - { - const char * const str = "b"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str); - } - { - const char * const str = "cd"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 1); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str + strlen(str)); - } - } - -} - -- (void)testOverlappingStrings { - { - StateMachine sm({{0, "b"}, {1, "bcd"}}); - { - const char * const str = "b"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 0); - XCTAssertEqual(res.definite, false); - XCTAssertEqual(res.next, str + 1); - } - { - const char * const str = "bc"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 0); - XCTAssertEqual(res.definite, false); - XCTAssertEqual(res.next, str + 1); - } - { - const char * const str = "bcd"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 1); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str + 3); - } - } - { - StateMachine sm({{0, "bc"}, {1, "bd"}, {2, "bdd"}}); - { - const char * const str = "b"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, false); - XCTAssertEqual(res.definite, false); - XCTAssertEqual(res.next, str); - } - { - const char * const str = "bc"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 0); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str + 2); - } - { - const char * const str = "bd"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 1); - XCTAssertEqual(res.definite, false); - XCTAssertEqual(res.next, str + 2); - } - { - const char * const str = "bdd"; - auto res = sm.prefixMatch(str, str + strlen(str)); - XCTAssertEqual(res.successful, true); - XCTAssertEqual(res.payload, 2); - XCTAssertEqual(res.definite, true); - XCTAssertEqual(res.next, str + 3); - } - } -} - -- (void)testPerformance { - auto dataUrl = [[NSURL fileURLWithPath:@( __FILE__ )].URLByDeletingLastPathComponent URLByAppendingPathComponent:@"PerfData"]; - NSError * err; - auto str = sys_string((NSString *)[NSString stringWithContentsOfURL:dataUrl encoding:NSUTF8StringEncoding error:&err]); - XCTAssertNil(err); - - StateMachine sm(g_tableRu); - - __block volatile char16_t sink; - __block intptr_t diff = 0; - - [self measureBlock:^{ - auto acess = sys_string::char_access(str); - auto begin = acess.begin(); - auto end = acess.end(); - auto completed = begin; - for (auto start = begin ; start != end; ) { - auto res = sm.prefixMatch(start, end); - if (res.successful) { - //m_matchedSomething = true; - //m_translit += res.payload; - sink = res.payload; - //if the result is not definite we don't know if a longer match is possible so bail out - if (!res.definite) - break; - //otherwise mark it as completed and continue - start = res.next; - //++m_translitCompletedSize; - completed = start; - } else if (!res.definite) { - //no match but could be with more input, bail out - //m_matchedSomething = true; - //m_translit.append(start, end); - for(auto it = start; it != end; ++it) - sink = *it; - break; - } else { - //no match and couldn't be - //consume 1 untranslated char and continue - //m_translit += *start; - sink = *start; - ++start; - //++m_translitCompletedSize; - completed = start; - } - } - diff = end - completed; - }]; - XCTAssertTrue(diff >= 0 && diff < 2); -} - -@end