TrieMetadata.java

1
/*******************************************************************************
2
 * Copyright (C) 2026, Leo Galambos
3
 * All rights reserved.
4
 * 
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 * 
8
 * 1. Redistributions of source code must retain the above copyright notice,
9
 *    this list of conditions and the following disclaimer.
10
 * 
11
 * 2. Redistributions in binary form must reproduce the above copyright notice,
12
 *    this list of conditions and the following disclaimer in the documentation
13
 *    and/or other materials provided with the distribution.
14
 * 
15
 * 3. Neither the name of the copyright holder nor the names of its contributors
16
 *    may be used to endorse or promote products derived from this software
17
 *    without specific prior written permission.
18
 * 
19
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
 * POSSIBILITY OF SUCH DAMAGE.
30
 ******************************************************************************/
31
package org.egothor.stemmer;
32
33
import java.util.HashMap;
34
import java.util.Map;
35
import java.util.Objects;
36
37
/**
38
 * Immutable metadata persisted together with a compiled trie artifact.
39
 *
40
 * <p>
41
 * The metadata captures the semantic build configuration required to interpret
42
 * the compiled trie correctly after it is reloaded. Persisting the metadata as
43
 * part of the artifact makes the binary format self-describing and avoids
44
 * coupling runtime consumers to external side-channel configuration.
45
 * </p>
46
 *
47
 * <p>
48
 * The record is intentionally extensible. It already models traversal
49
 * direction, reduction settings, and diacritic processing strategy, even though
50
 * not every field necessarily influences all current code paths yet.
51
 * </p>
52
 *
53
 * @param formatVersion           persisted binary format version of the trie
54
 *                                artifact
55
 * @param traversalDirection      logical key traversal direction
56
 * @param reductionSettings       reduction settings used during compilation
57
 * @param diacriticProcessingMode diacritic processing strategy associated with
58
 *                                the artifact
59
 * @param caseProcessingMode      case processing strategy associated with the
60
 *                                artifact
61
 */
62
public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDirection,
63
        ReductionSettings reductionSettings, DiacriticProcessingMode diacriticProcessingMode,
64
        CaseProcessingMode caseProcessingMode) {
65
    /**
66
     * Header identifying the human-readable metadata block layout.
67
     */
68
    private static final String TEXT_BLOCK_HEADER = "radixor.metadata.v1";
69
70
    /**
71
     * Creates a new metadata instance.
72
     *
73
     * @param formatVersion           persisted binary format version, must be at
74
     *                                least {@code 1}
75
     * @param traversalDirection      logical key traversal direction
76
     * @param reductionSettings       reduction settings used during compilation
77
     * @param diacriticProcessingMode diacritic processing strategy
78
     * @param caseProcessingMode      case processing strategy
79
     */
80
    public TrieMetadata(final int formatVersion, final WordTraversalDirection traversalDirection,
81
            final ReductionSettings reductionSettings, final DiacriticProcessingMode diacriticProcessingMode,
82
            final CaseProcessingMode caseProcessingMode) {
83
        if (formatVersion < 1) { // NOPMD
84
            throw new IllegalArgumentException("formatVersion must be at least 1.");
85
        }
86
        this.formatVersion = formatVersion;
87
        this.traversalDirection = Objects.requireNonNull(traversalDirection, "traversalDirection");
88
        this.reductionSettings = Objects.requireNonNull(reductionSettings, "reductionSettings");
89
        this.diacriticProcessingMode = Objects.requireNonNull(diacriticProcessingMode, "diacriticProcessingMode");
90
        this.caseProcessingMode = Objects.requireNonNull(caseProcessingMode, "caseProcessingMode");
91
    }
92
93
    /**
94
     * Creates metadata populated with current-format defaults for freshly compiled
95
     * tries.
96
     *
97
     * @param formatVersion      persisted binary format version
98
     * @param traversalDirection logical key traversal direction
99
     * @param reductionSettings  reduction settings used during compilation
100
     * @return metadata initialized with current defaults
101
     */
102
    public static TrieMetadata current(final int formatVersion, final WordTraversalDirection traversalDirection,
103
            final ReductionSettings reductionSettings) {
104 1 1. current : replaced return value with null for org/egothor/stemmer/TrieMetadata::current → NO_COVERAGE
        return new TrieMetadata(formatVersion, traversalDirection, reductionSettings, DiacriticProcessingMode.AS_IS,
105
                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
106
    }
107
108
    /**
109
     * Creates metadata for a newly compiled trie using the currently persisted
110
     * binary stream format version.
111
     *
112
     * @param traversalDirection      logical key traversal direction
113
     * @param reductionSettings       reduction settings used during compilation
114
     * @param diacriticProcessingMode diacritic processing strategy
115
     * @param caseProcessingMode      case processing strategy
116
     * @return metadata aligned with the current persisted stream format
117
     */
118
    public static TrieMetadata forCompilation(final WordTraversalDirection traversalDirection,
119
            final ReductionSettings reductionSettings, final DiacriticProcessingMode diacriticProcessingMode,
120
            final CaseProcessingMode caseProcessingMode) {
121 1 1. forCompilation : replaced return value with null for org/egothor/stemmer/TrieMetadata::forCompilation → KILLED
        return new TrieMetadata(FrequencyTrie.currentFormatVersion(), traversalDirection, reductionSettings,
122
                diacriticProcessingMode, caseProcessingMode);
123
    }
124
125
    /**
126
     * Creates metadata compatible with a legacy artifact version that did not store
127
     * the full configuration explicitly.
128
     *
129
     * @param formatVersion      legacy persisted binary format version
130
     * @param traversalDirection logical key traversal direction reconstructed from
131
     *                           the legacy stream
132
     * @return metadata reconstructed with conservative compatibility defaults
133
     */
134
    public static TrieMetadata legacy(final int formatVersion, final WordTraversalDirection traversalDirection) {
135 1 1. legacy : replaced return value with null for org/egothor/stemmer/TrieMetadata::legacy → SURVIVED
        return new TrieMetadata(formatVersion, traversalDirection,
136
                ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
137
                DiacriticProcessingMode.AS_IS, CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
138
    }
139
140
    /**
141
     * Returns metadata encoded as a deterministic human-readable text block.
142
     *
143
     * <p>
144
     * The format intentionally uses plain {@code key=value} lines so users can
145
     * inspect metadata quickly from a decompressed trie payload without additional
146
     * dependencies.
147
     * </p>
148
     *
149
     * @return persisted metadata text block
150
     */
151
    @SuppressWarnings("PMD.ConsecutiveLiteralAppends")
152
    public String toTextBlock() {
153
        final StringBuilder textBlockBuilder = new StringBuilder(1024);
154
        textBlockBuilder.append(TEXT_BLOCK_HEADER).append('\n')
155
                //
156
                .append("formatVersion=").append(this.formatVersion).append('\n')
157
                //
158
                .append("traversalDirection=").append(this.traversalDirection.name()).append('\n')
159
                //
160 1 1. toTextBlock : negated conditional → KILLED
                .append("rightToLeft=").append(this.traversalDirection == WordTraversalDirection.FORWARD).append('\n')
161
                //
162
                .append("reductionMode=").append(this.reductionSettings.reductionMode().name()).append('\n')
163
                //
164
                .append("dominantWinnerMinPercent=").append(this.reductionSettings.dominantWinnerMinPercent())
165
                .append('\n')
166
                //
167
                .append("dominantWinnerOverSecondRatio=").append(this.reductionSettings.dominantWinnerOverSecondRatio())
168
                .append('\n')
169
                //
170
                .append("diacriticProcessingMode=").append(this.diacriticProcessingMode.name()).append('\n')
171
                //
172
                .append("caseProcessingMode=").append(this.caseProcessingMode.name()).append('\n');
173 1 1. toTextBlock : replaced return value with "" for org/egothor/stemmer/TrieMetadata::toTextBlock → KILLED
        return textBlockBuilder.toString();
174
    }
175
176
    /**
177
     * Parses metadata from a text block produced by {@link #toTextBlock()}.
178
     *
179
     * @param formatVersion persisted binary format version
180
     * @param textBlock     metadata text block
181
     * @return parsed metadata
182
     */
183
    public static TrieMetadata fromTextBlock(final int formatVersion, final String textBlock) {
184
        Objects.requireNonNull(textBlock, "textBlock");
185
186
        final String[] lines = textBlock.split("\\R");
187 2 1. fromTextBlock : negated conditional → KILLED
2. fromTextBlock : negated conditional → KILLED
        if (lines.length == 0 || !TEXT_BLOCK_HEADER.equals(lines[0])) {
188
            throw new IllegalArgumentException("Unsupported metadata block header.");
189
        }
190
191
        final Map<String, String> entries = new HashMap<>();
192 2 1. fromTextBlock : negated conditional → KILLED
2. fromTextBlock : changed conditional boundary → KILLED
        for (int index = 1; index < lines.length; index++) {
193
            final String line = lines[index];
194 1 1. fromTextBlock : negated conditional → KILLED
            if (line.isBlank()) {
195
                continue;
196
            }
197
            final int delimiterIndex = line.indexOf('=');
198 4 1. fromTextBlock : Replaced integer subtraction with addition → SURVIVED
2. fromTextBlock : changed conditional boundary → SURVIVED
3. fromTextBlock : negated conditional → KILLED
4. fromTextBlock : negated conditional → KILLED
            if (delimiterIndex <= 0 || delimiterIndex == line.length() - 1) {
199
                throw new IllegalArgumentException("Invalid metadata line: " + line);
200
            }
201 1 1. fromTextBlock : Replaced integer addition with subtraction → KILLED
            entries.put(line.substring(0, delimiterIndex), line.substring(delimiterIndex + 1));
202
        }
203
204
        final WordTraversalDirection traversalDirection = WordTraversalDirection
205
                .valueOf(requireEntry(entries, "traversalDirection"));
206
        final ReductionMode reductionMode = ReductionMode.valueOf(requireEntry(entries, "reductionMode"));
207
        final int dominantWinnerMinPercent = Integer.parseInt(requireEntry(entries, "dominantWinnerMinPercent"));
208
        final int dominantWinnerOverSecondRatio = Integer // NOPMD
209
                .parseInt(requireEntry(entries, "dominantWinnerOverSecondRatio"));
210
        final DiacriticProcessingMode diacriticProcessingMode = DiacriticProcessingMode
211
                .valueOf(requireEntry(entries, "diacriticProcessingMode"));
212
        final CaseProcessingMode caseProcessingMode = CaseProcessingMode
213
                .valueOf(requireEntry(entries, "caseProcessingMode"));
214
215 1 1. fromTextBlock : replaced return value with null for org/egothor/stemmer/TrieMetadata::fromTextBlock → KILLED
        return new TrieMetadata(formatVersion, traversalDirection,
216
                new ReductionSettings(reductionMode, dominantWinnerMinPercent, dominantWinnerOverSecondRatio),
217
                diacriticProcessingMode, caseProcessingMode);
218
    }
219
220
    /**
221
     * Returns a required metadata entry from a parsed text block.
222
     *
223
     * @param entries parsed metadata entries
224
     * @param key     required entry key
225
     * @return non-blank entry value
226
     * @throws IllegalArgumentException if the entry is absent or blank
227
     */
228
    private static String requireEntry(final Map<String, String> entries, final String key) {
229
        final String value = entries.get(key);
230 2 1. requireEntry : negated conditional → KILLED
2. requireEntry : negated conditional → KILLED
        if (value == null || value.isBlank()) {
231
            throw new IllegalArgumentException("Missing metadata entry: " + key);
232
        }
233 1 1. requireEntry : replaced return value with "" for org/egothor/stemmer/TrieMetadata::requireEntry → KILLED
        return value;
234
    }
235
}

Mutations

104

1.1
Location : current
Killed by : none
replaced return value with null for org/egothor/stemmer/TrieMetadata::current → NO_COVERAGE

121

1.1
Location : forCompilation
Killed by : org.egothor.stemmer.FrequencyTrieTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.FrequencyTrieTest]/[method:trieRejectsNullLookupKeys()]
replaced return value with null for org/egothor/stemmer/TrieMetadata::forCompilation → KILLED

135

1.1
Location : legacy
Killed by : none
replaced return value with null for org/egothor/stemmer/TrieMetadata::legacy → SURVIVED
Covering tests

160

1.1
Location : toTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

173

1.1
Location : toTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
replaced return value with "" for org/egothor/stemmer/TrieMetadata::toTextBlock → KILLED

187

1.1
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

2.2
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

192

1.1
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

2.2
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockParserRejectsMalformedInput()]
changed conditional boundary → KILLED

194

1.1
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

198

1.1
Location : fromTextBlock
Killed by : none
Replaced integer subtraction with addition → SURVIVED
Covering tests

2.2
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

3.3
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockParserRejectsMalformedInput()]
negated conditional → KILLED

4.4
Location : fromTextBlock
Killed by : none
changed conditional boundary → SURVIVED Covering tests

201

1.1
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
Replaced integer addition with subtraction → KILLED

215

1.1
Location : fromTextBlock
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
replaced return value with null for org/egothor/stemmer/TrieMetadata::fromTextBlock → KILLED

230

1.1
Location : requireEntry
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

2.2
Location : requireEntry
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
negated conditional → KILLED

233

1.1
Location : requireEntry
Killed by : org.egothor.stemmer.TrieMetadataTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.TrieMetadataTest]/[method:textBlockRoundtripPreservesAllPersistedFields()]
replaced return value with "" for org/egothor/stemmer/TrieMetadata::requireEntry → KILLED

Active mutators

Tests examined


Report generated by PIT 1.22.1