Line data Source code
1 : /** 2 : Copyright (c) 2017-2022 Roman Katuntsev <sbkarr@stappler.org> 3 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev> 4 : 5 : Permission is hereby granted, free of charge, to any person obtaining a copy 6 : of this software and associated documentation files (the "Software"), to deal 7 : in the Software without restriction, including without limitation the rights 8 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 : copies of the Software, and to permit persons to whom the Software is 10 : furnished to do so, subject to the following conditions: 11 : 12 : The above copyright notice and this permission notice shall be included in 13 : all copies or substantial portions of the Software. 14 : 15 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 : THE SOFTWARE. 22 : **/ 23 : 24 : #ifndef STAPPLER_SEARCH_SPSEARCHDISTANCE_H_ 25 : #define STAPPLER_SEARCH_SPSEARCHDISTANCE_H_ 26 : 27 : #include "SPStringView.h" 28 : #include "SPMemory.h" 29 : 30 : namespace STAPPLER_VERSIONIZED stappler::search { 31 : 32 : using namespace mem_pool; 33 : 34 : // Edit (Levenshtein) Distance calculation and alignment, used by search index and transforms 35 : // See: https://en.wikipedia.org/wiki/Levenshtein_distance 36 : 37 : class Distance : public memory::AllocPool { 38 : public: 39 : enum class Value : uint8_t { 40 : Match, 41 : Insert, 42 : Delete, 43 : Replace 44 : }; 45 : 46 : class Storage : public memory::AllocPool { 47 : public: 48 : struct Struct { 49 : uint8_t v1 : 2; 50 : uint8_t v2 : 2; 51 : uint8_t v3 : 2; 52 : uint8_t v4 : 2; 53 : 54 101240 : void set(uint8_t idx, Value value) { 55 101240 : switch (idx) { 56 25314 : case 0: v1 = toInt(value); break; 57 25311 : case 1: v2 = toInt(value); break; 58 25309 : case 2: v3 = toInt(value); break; 59 25306 : case 3: v4 = toInt(value); break; 60 0 : default: break; 61 : } 62 101240 : } 63 12035 : Value get(uint8_t idx) const { 64 12035 : switch (idx) { 65 3010 : case 0: return Value(v1); break; 66 3008 : case 1: return Value(v2); break; 67 3008 : case 2: return Value(v3); break; 68 3009 : case 3: return Value(v4); break; 69 0 : default: break; 70 : } 71 0 : return Value::Match; 72 : } 73 : }; 74 : 75 : struct Size { 76 : size_t size : sizeof(size_t) * 8 - 1; 77 : size_t vec : 1; 78 : }; 79 : 80 : using Array = std::array<Struct, sizeof(Bytes) / sizeof(Struct)>; 81 : using Vec = Vector<Struct>; 82 : 83 : static Storage merge(const Storage &, const Storage &); 84 : 85 : Storage() noexcept; 86 : ~Storage() noexcept; 87 : 88 : Storage(const Storage &) noexcept; 89 : Storage(Storage &&) noexcept; 90 : 91 : Storage &operator=(const Storage &) noexcept; 92 : Storage &operator=(Storage &&) noexcept; 93 : 94 : bool empty() const; 95 : size_t size() const; 96 : size_t capacity() const; 97 : 98 : void reserve(size_t); 99 : void emplace_back(Value); 100 : void reverse(); 101 : 102 : Value at(size_t) const; 103 : void set(size_t, Value); 104 : 105 : void clear(); 106 : 107 : protected: 108 : bool isVecStorage() const; 109 : bool isVecStorage(size_t) const; 110 : 111 : Size _size; 112 : union { 113 : Array _array; 114 : Vec _bytes; 115 : }; 116 : }; 117 : 118 : Distance() noexcept; 119 : Distance(const StringView &origin, const StringView &canonical, size_t maxDistance = maxOf<size_t>()); 120 : 121 : Distance(const Distance &) noexcept; 122 : Distance(Distance &&) noexcept; 123 : 124 : Distance &operator=(const Distance &) noexcept; 125 : Distance &operator=(Distance &&) noexcept; 126 : 127 : bool empty() const; 128 : 129 : size_t size() const; 130 : 131 : // calculate position difference from canonical to original 132 : int32_t diff_original(size_t pos, bool forward = false) const; 133 : 134 : // calculate position difference from original to canonical 135 : int32_t diff_canonical(size_t pos, bool forward = false) const; 136 : 137 : size_t nmatch() const; 138 : 139 : memory::string info() const; 140 : 141 : protected: 142 : uint32_t _distance = 0; 143 : Storage _storage; 144 : }; 145 : 146 : } 147 : 148 : #endif /* STAPPLER_SEARCH_SPSEARCHDISTANCE_H_ */