| 1 | /******************************************************************************* | |
| 2 | * Copyright (C) 2026, Leo Galambos | |
| 3 | * All rights reserved. | |
| 4 | * | |
| 5 | * Redistribution and use in source and binary forms, with or without | |
| 6 | * modification, are permitted provided that the following conditions are met: | |
| 7 | * | |
| 8 | * 1. Redistributions of source code must retain the above copyright notice, | |
| 9 | * this list of conditions and the following disclaimer. | |
| 10 | * | |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright notice, | |
| 12 | * this list of conditions and the following disclaimer in the documentation | |
| 13 | * and/or other materials provided with the distribution. | |
| 14 | * | |
| 15 | * 3. Neither the name of the copyright holder nor the names of its contributors | |
| 16 | * may be used to endorse or promote products derived from this software | |
| 17 | * without specific prior written permission. | |
| 18 | * | |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| 20 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | |
| 23 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 29 | * POSSIBILITY OF SUCH DAMAGE. | |
| 30 | ******************************************************************************/ | |
| 31 | package org.egothor.stemmer; | |
| 32 | ||
| 33 | import java.util.HashMap; | |
| 34 | import java.util.Map; | |
| 35 | import java.util.Objects; | |
| 36 | ||
| 37 | /** | |
| 38 | * Immutable metadata persisted together with a compiled trie artifact. | |
| 39 | * | |
| 40 | * <p> | |
| 41 | * The metadata captures the semantic build configuration required to interpret | |
| 42 | * the compiled trie correctly after it is reloaded. Persisting the metadata as | |
| 43 | * part of the artifact makes the binary format self-describing and avoids | |
| 44 | * coupling runtime consumers to external side-channel configuration. | |
| 45 | * </p> | |
| 46 | * | |
| 47 | * <p> | |
| 48 | * The record is intentionally extensible. It already models traversal | |
| 49 | * direction, reduction settings, and diacritic processing strategy, even though | |
| 50 | * not every field necessarily influences all current code paths yet. | |
| 51 | * </p> | |
| 52 | * | |
| 53 | * @param formatVersion persisted binary format version of the trie | |
| 54 | * artifact | |
| 55 | * @param traversalDirection logical key traversal direction | |
| 56 | * @param reductionSettings reduction settings used during compilation | |
| 57 | * @param diacriticProcessingMode diacritic processing strategy associated with | |
| 58 | * the artifact | |
| 59 | * @param caseProcessingMode case processing strategy associated with the | |
| 60 | * artifact | |
| 61 | */ | |
| 62 | public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDirection, | |
| 63 | ReductionSettings reductionSettings, DiacriticProcessingMode diacriticProcessingMode, | |
| 64 | CaseProcessingMode caseProcessingMode) { | |
| 65 | /** | |
| 66 | * Header identifying the human-readable metadata block layout. | |
| 67 | */ | |
| 68 | private static final String TEXT_BLOCK_HEADER = "radixor.metadata.v1"; | |
| 69 | ||
| 70 | /** | |
| 71 | * Creates a new metadata instance. | |
| 72 | * | |
| 73 | * @param formatVersion persisted binary format version, must be at | |
| 74 | * least {@code 1} | |
| 75 | * @param traversalDirection logical key traversal direction | |
| 76 | * @param reductionSettings reduction settings used during compilation | |
| 77 | * @param diacriticProcessingMode diacritic processing strategy | |
| 78 | * @param caseProcessingMode case processing strategy | |
| 79 | */ | |
| 80 | public TrieMetadata(final int formatVersion, final WordTraversalDirection traversalDirection, | |
| 81 | final ReductionSettings reductionSettings, final DiacriticProcessingMode diacriticProcessingMode, | |
| 82 | final CaseProcessingMode caseProcessingMode) { | |
| 83 | if (formatVersion < 1) { // NOPMD | |
| 84 | throw new IllegalArgumentException("formatVersion must be at least 1."); | |
| 85 | } | |
| 86 | this.formatVersion = formatVersion; | |
| 87 | this.traversalDirection = Objects.requireNonNull(traversalDirection, "traversalDirection"); | |
| 88 | this.reductionSettings = Objects.requireNonNull(reductionSettings, "reductionSettings"); | |
| 89 | this.diacriticProcessingMode = Objects.requireNonNull(diacriticProcessingMode, "diacriticProcessingMode"); | |
| 90 | this.caseProcessingMode = Objects.requireNonNull(caseProcessingMode, "caseProcessingMode"); | |
| 91 | } | |
| 92 | ||
| 93 | /** | |
| 94 | * Creates metadata populated with current-format defaults for freshly compiled | |
| 95 | * tries. | |
| 96 | * | |
| 97 | * @param formatVersion persisted binary format version | |
| 98 | * @param traversalDirection logical key traversal direction | |
| 99 | * @param reductionSettings reduction settings used during compilation | |
| 100 | * @return metadata initialized with current defaults | |
| 101 | */ | |
| 102 | public static TrieMetadata current(final int formatVersion, final WordTraversalDirection traversalDirection, | |
| 103 | final ReductionSettings reductionSettings) { | |
| 104 |
1
1. current : replaced return value with null for org/egothor/stemmer/TrieMetadata::current → NO_COVERAGE |
return new TrieMetadata(formatVersion, traversalDirection, reductionSettings, DiacriticProcessingMode.AS_IS, |
| 105 | CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT); | |
| 106 | } | |
| 107 | ||
| 108 | /** | |
| 109 | * Creates metadata for a newly compiled trie using the currently persisted | |
| 110 | * binary stream format version. | |
| 111 | * | |
| 112 | * @param traversalDirection logical key traversal direction | |
| 113 | * @param reductionSettings reduction settings used during compilation | |
| 114 | * @param diacriticProcessingMode diacritic processing strategy | |
| 115 | * @param caseProcessingMode case processing strategy | |
| 116 | * @return metadata aligned with the current persisted stream format | |
| 117 | */ | |
| 118 | public static TrieMetadata forCompilation(final WordTraversalDirection traversalDirection, | |
| 119 | final ReductionSettings reductionSettings, final DiacriticProcessingMode diacriticProcessingMode, | |
| 120 | final CaseProcessingMode caseProcessingMode) { | |
| 121 |
1
1. forCompilation : replaced return value with null for org/egothor/stemmer/TrieMetadata::forCompilation → KILLED |
return new TrieMetadata(FrequencyTrie.currentFormatVersion(), traversalDirection, reductionSettings, |
| 122 | diacriticProcessingMode, caseProcessingMode); | |
| 123 | } | |
| 124 | ||
| 125 | /** | |
| 126 | * Creates metadata compatible with a legacy artifact version that did not store | |
| 127 | * the full configuration explicitly. | |
| 128 | * | |
| 129 | * @param formatVersion legacy persisted binary format version | |
| 130 | * @param traversalDirection logical key traversal direction reconstructed from | |
| 131 | * the legacy stream | |
| 132 | * @return metadata reconstructed with conservative compatibility defaults | |
| 133 | */ | |
| 134 | public static TrieMetadata legacy(final int formatVersion, final WordTraversalDirection traversalDirection) { | |
| 135 |
1
1. legacy : replaced return value with null for org/egothor/stemmer/TrieMetadata::legacy → SURVIVED |
return new TrieMetadata(formatVersion, traversalDirection, |
| 136 | ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS), | |
| 137 | DiacriticProcessingMode.AS_IS, CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT); | |
| 138 | } | |
| 139 | ||
| 140 | /** | |
| 141 | * Returns metadata encoded as a deterministic human-readable text block. | |
| 142 | * | |
| 143 | * <p> | |
| 144 | * The format intentionally uses plain {@code key=value} lines so users can | |
| 145 | * inspect metadata quickly from a decompressed trie payload without additional | |
| 146 | * dependencies. | |
| 147 | * </p> | |
| 148 | * | |
| 149 | * @return persisted metadata text block | |
| 150 | */ | |
| 151 | @SuppressWarnings("PMD.ConsecutiveLiteralAppends") | |
| 152 | public String toTextBlock() { | |
| 153 | final StringBuilder textBlockBuilder = new StringBuilder(1024); | |
| 154 | textBlockBuilder.append(TEXT_BLOCK_HEADER).append('\n') | |
| 155 | // | |
| 156 | .append("formatVersion=").append(this.formatVersion).append('\n') | |
| 157 | // | |
| 158 | .append("traversalDirection=").append(this.traversalDirection.name()).append('\n') | |
| 159 | // | |
| 160 |
1
1. toTextBlock : negated conditional → KILLED |
.append("rightToLeft=").append(this.traversalDirection == WordTraversalDirection.FORWARD).append('\n') |
| 161 | // | |
| 162 | .append("reductionMode=").append(this.reductionSettings.reductionMode().name()).append('\n') | |
| 163 | // | |
| 164 | .append("dominantWinnerMinPercent=").append(this.reductionSettings.dominantWinnerMinPercent()) | |
| 165 | .append('\n') | |
| 166 | // | |
| 167 | .append("dominantWinnerOverSecondRatio=").append(this.reductionSettings.dominantWinnerOverSecondRatio()) | |
| 168 | .append('\n') | |
| 169 | // | |
| 170 | .append("diacriticProcessingMode=").append(this.diacriticProcessingMode.name()).append('\n') | |
| 171 | // | |
| 172 | .append("caseProcessingMode=").append(this.caseProcessingMode.name()).append('\n'); | |
| 173 |
1
1. toTextBlock : replaced return value with "" for org/egothor/stemmer/TrieMetadata::toTextBlock → KILLED |
return textBlockBuilder.toString(); |
| 174 | } | |
| 175 | ||
| 176 | /** | |
| 177 | * Parses metadata from a text block produced by {@link #toTextBlock()}. | |
| 178 | * | |
| 179 | * @param formatVersion persisted binary format version | |
| 180 | * @param textBlock metadata text block | |
| 181 | * @return parsed metadata | |
| 182 | */ | |
| 183 | public static TrieMetadata fromTextBlock(final int formatVersion, final String textBlock) { | |
| 184 | Objects.requireNonNull(textBlock, "textBlock"); | |
| 185 | ||
| 186 | final String[] lines = textBlock.split("\\R"); | |
| 187 |
2
1. fromTextBlock : negated conditional → KILLED 2. fromTextBlock : negated conditional → KILLED |
if (lines.length == 0 || !TEXT_BLOCK_HEADER.equals(lines[0])) { |
| 188 | throw new IllegalArgumentException("Unsupported metadata block header."); | |
| 189 | } | |
| 190 | ||
| 191 | final Map<String, String> entries = new HashMap<>(); | |
| 192 |
2
1. fromTextBlock : negated conditional → KILLED 2. fromTextBlock : changed conditional boundary → KILLED |
for (int index = 1; index < lines.length; index++) { |
| 193 | final String line = lines[index]; | |
| 194 |
1
1. fromTextBlock : negated conditional → KILLED |
if (line.isBlank()) { |
| 195 | continue; | |
| 196 | } | |
| 197 | final int delimiterIndex = line.indexOf('='); | |
| 198 |
4
1. fromTextBlock : Replaced integer subtraction with addition → SURVIVED 2. fromTextBlock : changed conditional boundary → SURVIVED 3. fromTextBlock : negated conditional → KILLED 4. fromTextBlock : negated conditional → KILLED |
if (delimiterIndex <= 0 || delimiterIndex == line.length() - 1) { |
| 199 | throw new IllegalArgumentException("Invalid metadata line: " + line); | |
| 200 | } | |
| 201 |
1
1. fromTextBlock : Replaced integer addition with subtraction → KILLED |
entries.put(line.substring(0, delimiterIndex), line.substring(delimiterIndex + 1)); |
| 202 | } | |
| 203 | ||
| 204 | final WordTraversalDirection traversalDirection = WordTraversalDirection | |
| 205 | .valueOf(requireEntry(entries, "traversalDirection")); | |
| 206 | final ReductionMode reductionMode = ReductionMode.valueOf(requireEntry(entries, "reductionMode")); | |
| 207 | final int dominantWinnerMinPercent = Integer.parseInt(requireEntry(entries, "dominantWinnerMinPercent")); | |
| 208 | final int dominantWinnerOverSecondRatio = Integer // NOPMD | |
| 209 | .parseInt(requireEntry(entries, "dominantWinnerOverSecondRatio")); | |
| 210 | final DiacriticProcessingMode diacriticProcessingMode = DiacriticProcessingMode | |
| 211 | .valueOf(requireEntry(entries, "diacriticProcessingMode")); | |
| 212 | final CaseProcessingMode caseProcessingMode = CaseProcessingMode | |
| 213 | .valueOf(requireEntry(entries, "caseProcessingMode")); | |
| 214 | ||
| 215 |
1
1. fromTextBlock : replaced return value with null for org/egothor/stemmer/TrieMetadata::fromTextBlock → KILLED |
return new TrieMetadata(formatVersion, traversalDirection, |
| 216 | new ReductionSettings(reductionMode, dominantWinnerMinPercent, dominantWinnerOverSecondRatio), | |
| 217 | diacriticProcessingMode, caseProcessingMode); | |
| 218 | } | |
| 219 | ||
| 220 | /** | |
| 221 | * Returns a required metadata entry from a parsed text block. | |
| 222 | * | |
| 223 | * @param entries parsed metadata entries | |
| 224 | * @param key required entry key | |
| 225 | * @return non-blank entry value | |
| 226 | * @throws IllegalArgumentException if the entry is absent or blank | |
| 227 | */ | |
| 228 | private static String requireEntry(final Map<String, String> entries, final String key) { | |
| 229 | final String value = entries.get(key); | |
| 230 |
2
1. requireEntry : negated conditional → KILLED 2. requireEntry : negated conditional → KILLED |
if (value == null || value.isBlank()) { |
| 231 | throw new IllegalArgumentException("Missing metadata entry: " + key); | |
| 232 | } | |
| 233 |
1
1. requireEntry : replaced return value with "" for org/egothor/stemmer/TrieMetadata::requireEntry → KILLED |
return value; |
| 234 | } | |
| 235 | } | |
Mutations | ||
| 104 |
1.1 |
|
| 121 |
1.1 |
|
| 135 |
1.1 |
|
| 160 |
1.1 |
|
| 173 |
1.1 |
|
| 187 |
1.1 2.2 |
|
| 192 |
1.1 2.2 |
|
| 194 |
1.1 |
|
| 198 |
1.1 2.2 3.3 4.4 |
|
| 201 |
1.1 |
|
| 215 |
1.1 |
|
| 230 |
1.1 2.2 |
|
| 233 |
1.1 |