LCOV - code coverage report
Current view: top level - core/search - SPSearchDistance.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 13 16 81.2 %
Date: 2024-05-08 02:30:24 Functions: 2 2 100.0 %

          Line data    Source code
       1             : /**
       2             : Copyright (c) 2017-2022 Roman Katuntsev <sbkarr@stappler.org>
       3             : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
       4             : 
       5             : Permission is hereby granted, free of charge, to any person obtaining a copy
       6             : of this software and associated documentation files (the "Software"), to deal
       7             : in the Software without restriction, including without limitation the rights
       8             : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       9             : copies of the Software, and to permit persons to whom the Software is
      10             : furnished to do so, subject to the following conditions:
      11             : 
      12             : The above copyright notice and this permission notice shall be included in
      13             : all copies or substantial portions of the Software.
      14             : 
      15             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      16             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      17             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      18             : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      19             : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      20             : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      21             : THE SOFTWARE.
      22             : **/
      23             : 
      24             : #ifndef STAPPLER_SEARCH_SPSEARCHDISTANCE_H_
      25             : #define STAPPLER_SEARCH_SPSEARCHDISTANCE_H_
      26             : 
      27             : #include "SPStringView.h"
      28             : #include "SPMemory.h"
      29             : 
      30             : namespace STAPPLER_VERSIONIZED stappler::search {
      31             : 
      32             : using namespace mem_pool;
      33             : 
      34             : // Edit (Levenshtein) Distance calculation and alignment, used by search index and transforms
      35             : // See: https://en.wikipedia.org/wiki/Levenshtein_distance
      36             : 
      37             : class Distance : public memory::AllocPool {
      38             : public:
      39             :         enum class Value : uint8_t {
      40             :                 Match,
      41             :                 Insert,
      42             :                 Delete,
      43             :                 Replace
      44             :         };
      45             : 
      46             :         class Storage : public memory::AllocPool {
      47             :         public:
      48             :                 struct Struct {
      49             :                         uint8_t v1 : 2;
      50             :                         uint8_t v2 : 2;
      51             :                         uint8_t v3 : 2;
      52             :                         uint8_t v4 : 2;
      53             : 
      54      101240 :                         void set(uint8_t idx, Value value) {
      55      101240 :                                 switch (idx) {
      56       25314 :                                 case 0: v1 = toInt(value); break;
      57       25311 :                                 case 1: v2 = toInt(value); break;
      58       25309 :                                 case 2: v3 = toInt(value); break;
      59       25306 :                                 case 3: v4 = toInt(value); break;
      60           0 :                                 default: break;
      61             :                                 }
      62      101240 :                         }
      63       12035 :                         Value get(uint8_t idx) const {
      64       12035 :                                 switch (idx) {
      65        3010 :                                 case 0: return Value(v1); break;
      66        3008 :                                 case 1: return Value(v2); break;
      67        3008 :                                 case 2: return Value(v3); break;
      68        3009 :                                 case 3: return Value(v4); break;
      69           0 :                                 default: break;
      70             :                                 }
      71           0 :                                 return Value::Match;
      72             :                         }
      73             :                 };
      74             : 
      75             :                 struct Size {
      76             :                         size_t size : sizeof(size_t) * 8 - 1;
      77             :                         size_t vec : 1;
      78             :                 };
      79             : 
      80             :                 using Array = std::array<Struct, sizeof(Bytes) / sizeof(Struct)>;
      81             :                 using Vec = Vector<Struct>;
      82             : 
      83             :                 static Storage merge(const Storage &, const Storage &);
      84             : 
      85             :                 Storage() noexcept;
      86             :                 ~Storage() noexcept;
      87             : 
      88             :                 Storage(const Storage &) noexcept;
      89             :                 Storage(Storage &&) noexcept;
      90             : 
      91             :                 Storage &operator=(const Storage &) noexcept;
      92             :                 Storage &operator=(Storage &&) noexcept;
      93             : 
      94             :                 bool empty() const;
      95             :                 size_t size() const;
      96             :                 size_t capacity() const;
      97             : 
      98             :                 void reserve(size_t);
      99             :                 void emplace_back(Value);
     100             :                 void reverse();
     101             : 
     102             :                 Value at(size_t) const;
     103             :                 void set(size_t, Value);
     104             : 
     105             :                 void clear();
     106             : 
     107             :         protected:
     108             :                 bool isVecStorage() const;
     109             :                 bool isVecStorage(size_t) const;
     110             : 
     111             :                 Size _size;
     112             :                 union {
     113             :                         Array _array;
     114             :                         Vec _bytes;
     115             :                 };
     116             :         };
     117             : 
     118             :         Distance() noexcept;
     119             :         Distance(const StringView &origin, const StringView &canonical, size_t maxDistance = maxOf<size_t>());
     120             : 
     121             :         Distance(const Distance &) noexcept;
     122             :         Distance(Distance &&) noexcept;
     123             : 
     124             :         Distance &operator=(const Distance &) noexcept;
     125             :         Distance &operator=(Distance &&) noexcept;
     126             : 
     127             :         bool empty() const;
     128             : 
     129             :         size_t size() const;
     130             : 
     131             :         // calculate position difference from canonical to original
     132             :         int32_t diff_original(size_t pos, bool forward = false) const;
     133             : 
     134             :         // calculate position difference from original to canonical
     135             :         int32_t diff_canonical(size_t pos, bool forward = false) const;
     136             : 
     137             :         size_t nmatch() const;
     138             : 
     139             :         memory::string info() const;
     140             : 
     141             : protected:
     142             :         uint32_t _distance = 0;
     143             :         Storage _storage;
     144             : };
     145             : 
     146             : }
     147             : 
     148             : #endif /* STAPPLER_SEARCH_SPSEARCHDISTANCE_H_ */

Generated by: LCOV version 1.14