Compile.java

1
/*******************************************************************************
2
 * Copyright (C) 2026, Leo Galambos
3
 * All rights reserved.
4
 * 
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 * 
8
 * 1. Redistributions of source code must retain the above copyright notice,
9
 *    this list of conditions and the following disclaimer.
10
 * 
11
 * 2. Redistributions in binary form must reproduce the above copyright notice,
12
 *    this list of conditions and the following disclaimer in the documentation
13
 *    and/or other materials provided with the distribution.
14
 * 
15
 * 3. Neither the name of the copyright holder nor the names of its contributors
16
 *    may be used to endorse or promote products derived from this software
17
 *    without specific prior written permission.
18
 * 
19
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
 * POSSIBILITY OF SUCH DAMAGE.
30
 ******************************************************************************/
31
package org.egothor.stemmer;
32
33
import java.io.IOException;
34
import java.nio.file.Files;
35
import java.nio.file.Path;
36
import java.util.Locale;
37
import java.util.Objects;
38
import java.util.logging.Level;
39
import java.util.logging.Logger;
40
41
/**
42
 * Command-line compiler of stemmer dictionary files into compressed binary
43
 * {@link FrequencyTrie} artifacts.
44
 *
45
 * <p>
46
 * The CLI reads an input file in the same syntax as the project's stemmer
47
 * resource files, compiles it into a read-only {@link FrequencyTrie} of patch
48
 * commands, applies the selected subtree reduction strategy, and writes the
49
 * resulting trie in the project binary format under GZip compression.
50
 *
51
 * <p>
52
 * Remarks introduced by {@code #} or {@code //} are supported through
53
 * {@link StemmerDictionaryParser}.
54
 *
55
 * <p>
56
 * Supported arguments:
57
 * </p>
58
 *
59
 * <pre>
60
 * --input &lt;file&gt;
61
 * --output &lt;file&gt;
62
 * --reduction-mode &lt;mode&gt;
63
 * [--store-original]
64
 * [--dominant-winner-min-percent &lt;1..100&gt;]
65
 * [--dominant-winner-over-second-ratio &lt;1..n&gt;]
66
 * [--overwrite]
67
 * [--help]
68
 * </pre>
69
 */
70
public final class Compile {
71
72
    /**
73
     * Logger of this class.
74
     */
75
    private static final Logger LOGGER = Logger.getLogger(Compile.class.getName());
76
77
    /**
78
     * Exit status indicating success.
79
     */
80
    private static final int EXIT_SUCCESS = 0;
81
82
    /**
83
     * Exit status indicating invalid command-line usage.
84
     */
85
    private static final int EXIT_USAGE_ERROR = 2;
86
87
    /**
88
     * Exit status indicating processing failure.
89
     */
90
    private static final int EXIT_PROCESSING_ERROR = 1;
91
92
    /**
93
     * Utility class.
94
     */
95
    private Compile() {
96
        throw new AssertionError("No instances.");
97
    }
98
99
    /**
100
     * CLI entry point.
101
     *
102
     * @param arguments command-line arguments
103
     */
104
    public static void main(final String[] arguments) {
105
        final int exitCode = run(arguments);
106
        if (exitCode != EXIT_SUCCESS) {
107
            System.exit(exitCode);
108
        }
109
    }
110
111
    /**
112
     * Executes the CLI.
113
     *
114
     * @param arguments command-line arguments
115
     * @return process exit code
116
     */
117
    /* default */ static int run(final String... arguments) {
118
        try {
119
            final Arguments parsedArguments = Arguments.parse(arguments);
120
            if (parsedArguments.help()) {
121
                printUsage();
122
                return EXIT_SUCCESS;
123
            }
124
125
            compile(parsedArguments);
126
            return EXIT_SUCCESS;
127
        } catch (IllegalArgumentException exception) {
128
            System.err.println(exception.getMessage());
129
            System.err.println();
130
            printUsage();
131
            return EXIT_USAGE_ERROR;
132
        } catch (IOException exception) {
133
            if (LOGGER.isLoggable(Level.SEVERE)) {
134
                LOGGER.log(Level.SEVERE, "CLI compilation failed for input {0} and output {1}.",
135
                        new Object[] { safeInput(arguments), safeOutput(arguments) });
136
            }
137
            System.err.println("Compilation failed: " + exception.getMessage());
138
            return EXIT_PROCESSING_ERROR;
139
        }
140
    }
141
142
    /**
143
     * Compiles the input dictionary and writes the compressed binary trie.
144
     *
145
     * @param arguments parsed command-line arguments
146
     * @throws IOException if compilation or output writing fails
147
     */
148
    private static void compile(final Arguments arguments) throws IOException {
149
        final ReductionSettings reductionSettings = new ReductionSettings(arguments.reductionMode(),
150
                arguments.dominantWinnerMinPercent(), arguments.dominantWinnerOverSecondRatio());
151
152
        final FrequencyTrie<String> trie = StemmerPatchTrieLoader.load(arguments.inputFile(), arguments.storeOriginal(),
153
                reductionSettings);
154
155
        final Path outputFile = arguments.outputFile();
156
        final Path parent = outputFile.toAbsolutePath().getParent();
157
        if (parent != null) {
158
            Files.createDirectories(parent);
159
        }
160
161
        if (Files.exists(outputFile) && !arguments.overwrite()) {
162
            throw new IOException("Output file already exists: " + outputFile.toAbsolutePath());
163
        }
164
165
        StemmerPatchTrieBinaryIO.write(trie, outputFile);
166
167
        if (LOGGER.isLoggable(Level.INFO)) {
168
            LOGGER.log(Level.INFO,
169
                    "Compiled dictionary {0} to {1} using mode {2}, storeOriginal={3}, dominantWinnerMinPercent={4}, dominantWinnerOverSecondRatio={5}.",
170
                    new Object[] { arguments.inputFile().toAbsolutePath().toString(),
171
                            arguments.outputFile().toAbsolutePath().toString(), arguments.reductionMode().name(),
172
                            arguments.storeOriginal(), arguments.dominantWinnerMinPercent(),
173
                            arguments.dominantWinnerOverSecondRatio() });
174
        }
175
    }
176
177
    /**
178
     * Prints CLI usage help.
179
     */
180
    private static void printUsage() {
181
        System.err.println("Usage:");
182
        System.err.println("  java org.egothor.stemmer.Compile \\");
183
        System.err.println("      --input <file> \\");
184
        System.err.println("      --output <file> \\");
185
        System.err.println("      --reduction-mode <mode> \\");
186
        System.err.println("      [--store-original] \\");
187
        System.err.println("      [--dominant-winner-min-percent <1..100>] \\");
188
        System.err.println("      [--dominant-winner-over-second-ratio <1..n>] \\");
189
        System.err.println("      [--overwrite]");
190
        System.err.println();
191
        System.err.println("Supported reduction modes:");
192
        for (ReductionMode mode : ReductionMode.values()) {
193
            System.err.println("  " + mode.name());
194
        }
195
    }
196
197
    /**
198
     * Returns a best-effort input value for diagnostic logging.
199
     *
200
     * @param arguments raw command-line arguments
201
     * @return input value if present, otherwise {@code "<unknown>"}
202
     */
203
    private static String safeInput(final String... arguments) {
204
        return safeOptionValue(arguments, "--input");
205
    }
206
207
    /**
208
     * Returns a best-effort output value for diagnostic logging.
209
     *
210
     * @param arguments raw command-line arguments
211
     * @return output value if present, otherwise {@code "<unknown>"}
212
     */
213
    private static String safeOutput(final String... arguments) {
214
        return safeOptionValue(arguments, "--output");
215
    }
216
217
    /**
218
     * Returns a best-effort option value from raw arguments.
219
     *
220
     * @param arguments raw command-line arguments
221
     * @param option    option name
222
     * @return option value if present, otherwise {@code "<unknown>"}
223
     */
224
    private static String safeOptionValue(final String[] arguments, final String option) {
225
        if (arguments == null) {
226
            return "<unknown>";
227
        }
228
        for (int index = 0; index < arguments.length - 1; index++) {
229
            if (option.equals(arguments[index])) {
230
                return arguments[index + 1];
231
            }
232
        }
233
        return "<unknown>";
234
    }
235
236
    /**
237
     * Immutable parsed CLI arguments.
238
     *
239
     * @param inputFile                     input dictionary file
240
     * @param outputFile                    output compressed trie file
241
     * @param reductionMode                 subtree reduction mode
242
     * @param storeOriginal                 whether original stems are stored
243
     * @param dominantWinnerMinPercent      dominant winner minimum percent
244
     * @param dominantWinnerOverSecondRatio dominant winner over second ratio
245
     * @param overwrite                     whether an existing output may be
246
     *                                      replaced
247
     * @param help                          whether usage help was requested
248
     */
249
    @SuppressWarnings("PMD.LongVariable")
250
    private record Arguments(Path inputFile, Path outputFile, ReductionMode reductionMode, boolean storeOriginal,
251
            int dominantWinnerMinPercent, int dominantWinnerOverSecondRatio, boolean overwrite, boolean help) {
252
253
        /**
254
         * Parses raw command-line arguments.
255
         *
256
         * @param arguments raw command-line arguments
257
         * @return parsed arguments
258
         */
259
        @SuppressWarnings({ "PMD.AvoidReassigningLoopVariables", "PMD.CyclomaticComplexity" })
260
        private static Arguments parse(final String... arguments) {
261
            Objects.requireNonNull(arguments, "arguments");
262
263
            Path inputFile = null;
264
            Path outputFile = null;
265
            ReductionMode reductionMode = null;
266
            boolean storeOriginal = false;
267
            boolean overwrite = false;
268
            boolean help = false;
269
            int dominantWinnerMinPercent = ReductionSettings.DEFAULT_DOMINANT_WINNER_MIN_PERCENT;
270
            int dominantWinnerOverSecondRatio = ReductionSettings.DEFAULT_DOMINANT_WINNER_OVER_SECOND_RATIO;
271
272 2 1. parse : negated conditional → KILLED
2. parse : changed conditional boundary → KILLED
            for (int index = 0; index < arguments.length; index++) {
273
                final String argument = arguments[index];
274
275
                switch (argument) {
276
                    case "--help":
277
                    case "-h":
278
                        help = true;
279
                        break;
280
281
                    case "--store-original":
282
                        storeOriginal = true;
283
                        break;
284
285
                    case "--overwrite":
286
                        overwrite = true;
287
                        break;
288
289
                    case "--input":
290 1 1. parse : Changed increment from 1 to -1 → KILLED
                        inputFile = Path.of(requireValue(arguments, ++index, "--input"));
291
                        break;
292
293
                    case "--output":
294 1 1. parse : Changed increment from 1 to -1 → TIMED_OUT
                        outputFile = Path.of(requireValue(arguments, ++index, "--output"));
295
                        break;
296
297
                    case "--reduction-mode":
298 1 1. parse : Changed increment from 1 to -1 → KILLED
                        reductionMode = ReductionMode
299
                                .valueOf(requireValue(arguments, ++index, "--reduction-mode").toUpperCase(Locale.ROOT));
300
                        break;
301
302
                    case "--dominant-winner-min-percent":
303 1 1. parse : Changed increment from 1 to -1 → KILLED
                        dominantWinnerMinPercent = parseInteger(
304
                                requireValue(arguments, ++index, "--dominant-winner-min-percent"),
305
                                "--dominant-winner-min-percent");
306
                        break;
307
308
                    case "--dominant-winner-over-second-ratio":
309 1 1. parse : Changed increment from 1 to -1 → KILLED
                        dominantWinnerOverSecondRatio = parseInteger(
310
                                requireValue(arguments, ++index, "--dominant-winner-over-second-ratio"),
311
                                "--dominant-winner-over-second-ratio");
312
                        break;
313
314
                    default:
315
                        throw new IllegalArgumentException("Unknown argument: " + argument);
316
                }
317
            }
318
319 1 1. parse : negated conditional → KILLED
            if (help) {
320 1 1. parse : replaced return value with null for org/egothor/stemmer/Compile$Arguments::parse → KILLED
                return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, dominantWinnerMinPercent,
321
                        dominantWinnerOverSecondRatio, overwrite, true);
322
            }
323
324 1 1. parse : negated conditional → KILLED
            if (inputFile == null) {
325
                throw new IllegalArgumentException("Missing required argument --input.");
326
            }
327 1 1. parse : negated conditional → KILLED
            if (outputFile == null) {
328
                throw new IllegalArgumentException("Missing required argument --output.");
329
            }
330 1 1. parse : negated conditional → KILLED
            if (reductionMode == null) {
331
                throw new IllegalArgumentException("Missing required argument --reduction-mode.");
332
            }
333
334 1 1. parse : replaced return value with null for org/egothor/stemmer/Compile$Arguments::parse → KILLED
            return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, dominantWinnerMinPercent,
335
                    dominantWinnerOverSecondRatio, overwrite, false);
336
        }
337
338
        /**
339
         * Returns the required value of an option.
340
         *
341
         * @param arguments raw arguments
342
         * @param index     value index
343
         * @param option    option name
344
         * @return option value
345
         */
346
        private static String requireValue(final String[] arguments, final int index, final String option) {
347 2 1. requireValue : changed conditional boundary → KILLED
2. requireValue : negated conditional → KILLED
            if (index >= arguments.length) {
348
                throw new IllegalArgumentException("Missing value for " + option + ".");
349
            }
350 1 1. requireValue : replaced return value with "" for org/egothor/stemmer/Compile$Arguments::requireValue → KILLED
            return arguments[index];
351
        }
352
353
        /**
354
         * Parses an integer option value.
355
         *
356
         * @param value      raw value
357
         * @param optionName option name
358
         * @return parsed integer
359
         */
360
        private static int parseInteger(final String value, final String optionName) {
361
            try {
362 1 1. parseInteger : replaced int return with 0 for org/egothor/stemmer/Compile$Arguments::parseInteger → NO_COVERAGE
                return Integer.parseInt(value);
363
            } catch (NumberFormatException exception) {
364
                throw new IllegalArgumentException("Invalid integer for " + optionName + ": " + value, exception);
365
            }
366
        }
367
    }
368
}

Mutations

272

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorWhenOptionValueIsMissing()]
negated conditional → KILLED

2.2
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[method:shouldReturnSuccessAndPrintUsageWhenHelpIsRequested()]
changed conditional boundary → KILLED

290

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorWhenOptionValueIsMissing()]
Changed increment from 1 to -1 → KILLED

294

1.1
Location : parse
Killed by : none
Changed increment from 1 to -1 → TIMED_OUT

298

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorForInvalidDominantWinnerMinPercent()]
Changed increment from 1 to -1 → KILLED

303

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorForInvalidDominantWinnerMinPercent()]
Changed increment from 1 to -1 → KILLED

309

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorForInvalidDominantWinnerOverSecondRatio()]
Changed increment from 1 to -1 → KILLED

319

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[method:shouldReturnSuccessAndPrintUsageWhenHelpIsRequested()]
negated conditional → KILLED

320

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[method:shouldReturnSuccessAndPrintUsageWhenHelpIsRequested()]
replaced return value with null for org/egothor/stemmer/Compile$Arguments::parse → KILLED

324

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorWhenInputIsMissing()]
negated conditional → KILLED

327

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorWhenOutputIsMissing()]
negated conditional → KILLED

330

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorWhenReductionModeIsMissing()]
negated conditional → KILLED

334

1.1
Location : parse
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[method:shouldFailWithProcessingErrorWhenInputFileDoesNotExist()]
replaced return value with null for org/egothor/stemmer/Compile$Arguments::parse → KILLED

347

1.1
Location : requireValue
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorWhenOptionValueIsMissing()]
changed conditional boundary → KILLED

2.2
Location : requireValue
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorWhenOptionValueIsMissing()]
negated conditional → KILLED

350

1.1
Location : requireValue
Killed by : org.egothor.stemmer.CompileTest.[engine:junit-jupiter]/[class:org.egothor.stemmer.CompileTest]/[nested-class:ArgumentValidationTest]/[method:shouldFailWithUsageErrorForInvalidDominantWinnerOverSecondRatio()]
replaced return value with "" for org/egothor/stemmer/Compile$Arguments::requireValue → KILLED

362

1.1
Location : parseInteger
Killed by : none
replaced int return with 0 for org/egothor/stemmer/Compile$Arguments::parseInteger → NO_COVERAGE

Active mutators

Tests examined


Report generated by PIT 1.22.1