StemmerPatchTrieLoader.java

/*******************************************************************************
 * Copyright (C) 2026, Leo Galambos
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 ******************************************************************************/
package org.egothor.stemmer;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;

/**
 * Loader of patch-command tries from bundled stemmer dictionaries.
 *
 * <p>
 * Each dictionary is line-oriented and uses a tab-separated values layout. The
 * first column on a line is interpreted as the stem, and all following
 * tab-separated columns are treated as known variants of that stem.
 *
 * <p>
 * For each line, the loader inserts:
 * <ul>
 * <li>the stem itself mapped to the canonical no-op patch command
 * {@link PatchCommandEncoder#NOOP_PATCH}, when requested by the caller</li>
 * <li>every distinct variant mapped to the patch command transforming that
 * variant to the stem using the traversal direction implied by the selected
 * language or loader overload</li>
 * </ul>
 *
 * <p>
 * Parsing is delegated to {@link StemmerDictionaryParser}, which also supports
 * line remarks introduced by {@code #} or {@code //} and ignores dictionary
 * items containing Unicode whitespace characters while reporting them through
 * aggregated warning log records.
 */
public final class StemmerPatchTrieLoader {

    /* default */ static final String FILENAME_REQUIRED = "fileName required";

    /**
     * Logger of this class.
     */
    private static final Logger LOGGER = Logger.getLogger(StemmerPatchTrieLoader.class.getName());

    /**
     * Canonical no-op patch command used when the source and target are equal.
     */
    private static final String NOOP_PATCH_COMMAND = PatchCommandEncoder.NOOP_PATCH;

    /**
     * Utility class.
     */
    private StemmerPatchTrieLoader() {
        throw new AssertionError("No instances.");
    }

    /**
     * Supported bundled stemmer dictionaries.
     *
     * <p>
     * Each language constant defines:
     * </p>
     * <ul>
     * <li>the resource directory name used under the bundled resources tree</li>
     * <li>whether the language is written right-to-left</li>
     * </ul>
     *
     * <p>
     * The right-to-left flag is intended for consumers that need to decide whether
     * affix-oriented processing should conceptually traverse words from the visual
     * end or from the logical beginning of the stored form.
     * </p>
     */
    public enum Language {

        /**
         * Czech.
         */
        CS_CZ("cs_cz", false),

        /**
         * Danish.
         */
        DA_DK("da_dk", false),

        /**
         * German.
         */
        DE_DE("de_de", false),

        /**
         * Spanish.
         */
        ES_ES("es_es", false),

        /**
         * Persian.
         */
        FA_IR("fa_ir", true),

        /**
         * Finnish.
         */
        FI_FI("fi_fi", false),

        /**
         * French.
         */
        FR_FR("fr_fr", false),

        /**
         * Hebrew.
         */
        HE_IL("he_il", true),

        /**
         * Hungarian.
         */
        HU_HU("hu_hu", false),

        /**
         * Italian.
         */
        IT_IT("it_it", false),

        /**
         * Norwegian Bokmål.
         */
        NB_NO("nb_no", false),

        /**
         * Dutch.
         */
        NL_NL("nl_nl", false),

        /**
         * Norwegian Nynorsk.
         */
        NN_NO("nn_no", false),

        /**
         * Polish.
         */
        PL_PL("pl_pl", false),

        /**
         * Portuguese.
         */
        PT_PT("pt_pt", false),

        /**
         * Russian.
         */
        RU_RU("ru_ru", false),

        /**
         * Swedish.
         */
        SV_SE("sv_se", false),

        /**
         * Ukrainian.
         */
        UK_UA("uk_ua", false),

        /**
         * English.
         */
        US_UK("us_uk", false),

        /**
         * Yiddish.
         */
        YI("yi", true);

        /**
         * Resource directory name.
         */
        private final String resourceDirectory;

        /**
         * Whether the language is written right-to-left.
         */
        private final boolean rightToLeft;

        /**
         * Creates a language constant.
         *
         * @param resourceDirectory resource directory name
         * @param rightToLeft       whether the language is written right-to-left
         */
        Language(final String resourceDirectory, final boolean rightToLeft) {
            this.resourceDirectory = resourceDirectory;
            this.rightToLeft = rightToLeft;
        }

        /**
         * Returns the classpath resource path of the bundled stemmer dictionary.
         *
         * @return classpath resource path
         */
        public String resourcePath() {
            return this.resourceDirectory + "/stemmer.gz";
        }

        /**
         * Returns the resource directory name.
         *
         * @return resource directory name
         */
        public String resourceDirectory() {
            return this.resourceDirectory;
        }

        /**
         * Returns whether the language is written right-to-left.
         *
         * <p>
         * This flag can be used by trie-building and lookup logic to decide whether
         * suffix-oriented traversal should operate on the stored word form as-is rather
         * than by reversing the logical character sequence.
         * </p>
         *
         * @return {@code true} when the language is written right-to-left, otherwise
         *         {@code false}
         */
        public boolean isRightToLeft() {
            return this.rightToLeft;
        }
    }

    /**
     * Loads a bundled dictionary using explicit reduction settings.
     *
     * <p>
     * This overload applies the following implicit compilation defaults in addition
     * to the supplied {@code reductionSettings}:
     * </p>
     * <ul>
     * <li>traversal direction is derived from {@link Language#isRightToLeft()}
     * ({@link WordTraversalDirection#FORWARD} for right-to-left languages,
     * {@link WordTraversalDirection#BACKWARD} otherwise)</li>
     * <li>case processing mode is
     * {@link CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT}</li>
     * <li>diacritic processing mode is {@link DiacriticProcessingMode#AS_IS}</li>
     * </ul>
     *
     * <p>
     * The resolved settings are persisted into {@link TrieMetadata} of the
     * resulting trie.
     * </p>
     *
     * @param language          bundled language dictionary
     * @param storeOriginal     whether the stem itself should be inserted using the
     *                          canonical no-op patch command
     * @param reductionSettings reduction settings
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the dictionary cannot be found or read
     */
    public static FrequencyTrie<String> load(final Language language, final boolean storeOriginal,
            final ReductionSettings reductionSettings) throws IOException {
        Objects.requireNonNull(language, "language");
        Objects.requireNonNull(reductionSettings, "reductionSettings");
        final TrieMetadata metadata = metadataForCompilation(traversalDirectionOf(language), reductionSettings,
                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT, DiacriticProcessingMode.AS_IS);
        return load(language, storeOriginal, metadata);
    }

    /**
     * Loads a bundled dictionary using explicit trie compilation metadata.
     *
     * <p>
     * All semantic compilation settings (reduction mode and thresholds, traversal
     * direction, case processing mode, and diacritic processing mode) are taken
     * from the supplied metadata object and are persisted unchanged in the
     * resulting trie.
     * </p>
     *
     * @param language      bundled language dictionary
     * @param storeOriginal whether the stem itself should be inserted using the
     *                      canonical no-op patch command
     * @param metadata      trie metadata describing the compilation configuration
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the dictionary cannot be found or read
     */
    public static FrequencyTrie<String> load(final Language language, final boolean storeOriginal,
            final TrieMetadata metadata) throws IOException {
        Objects.requireNonNull(language, "language");
        Objects.requireNonNull(metadata, "metadata");

        final String resourcePath = language.resourcePath();

        try (InputStream inputStream = openBundledResource(resourcePath);
                BufferedReader reader = new BufferedReader(
                        new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
            return load(reader, resourcePath, storeOriginal, metadata);
        }
    }

    /**
     * Loads a bundled dictionary using default settings for the supplied reduction
     * mode.
     *
     * <p>
     * This overload is equivalent to calling
     * {@link #load(Language, boolean, ReductionSettings)} with
     * {@link ReductionSettings#withDefaults(ReductionMode)} and therefore uses the
     * same implicit defaults for traversal direction, case processing mode, and
     * diacritic processing mode.
     * </p>
     *
     * @param language      bundled language dictionary
     * @param storeOriginal whether the stem itself should be inserted using the
     *                      canonical no-op patch command
     * @param reductionMode reduction mode
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the dictionary cannot be found or read
     */
    public static FrequencyTrie<String> load(final Language language, final boolean storeOriginal,
            final ReductionMode reductionMode) throws IOException {
        Objects.requireNonNull(reductionMode, "reductionMode");
        return load(language, storeOriginal, ReductionSettings.withDefaults(reductionMode));
    }

    /**
     * Loads a dictionary from a filesystem path using explicit reduction settings.
     *
     * <p>
     * This overload applies historical Egothor-compatible implicit defaults:
     * {@link WordTraversalDirection#BACKWARD},
     * {@link CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT}, and
     * {@link DiacriticProcessingMode#AS_IS}. These settings are persisted in
     * resulting trie metadata.
     * </p>
     *
     * @param path              path to the dictionary file
     * @param storeOriginal     whether the stem itself should be inserted using the
     *                          canonical no-op patch command
     * @param reductionSettings reduction settings
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
            final ReductionSettings reductionSettings) throws IOException {
        return load(path, storeOriginal, reductionSettings, WordTraversalDirection.BACKWARD,
                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT, DiacriticProcessingMode.AS_IS);
    }

    /**
     * Loads a dictionary from a filesystem path using explicit reduction settings
     * and explicit traversal direction.
     *
     * <p>
     * Implicit defaults still apply for unspecified dimensions:
     * {@link CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT} and
     * {@link DiacriticProcessingMode#AS_IS}.
     * </p>
     *
     * @param path               path to the dictionary file
     * @param storeOriginal      whether the stem itself should be inserted using
     *                           the canonical no-op patch command
     * @param reductionSettings  reduction settings
     * @param traversalDirection traversal direction used for both trie keys and
     *                           patch commands
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection)
            throws IOException {
        return load(path, storeOriginal, reductionSettings, traversalDirection,
                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT, DiacriticProcessingMode.AS_IS);
    }

    /**
     * Loads a dictionary from a filesystem path using explicit reduction settings,
     * explicit traversal direction, and explicit case processing mode.
     *
     * <p>
     * This overload still defaults diacritic processing to
     * {@link DiacriticProcessingMode#AS_IS}.
     * </p>
     *
     * @param path               path to the dictionary file
     * @param storeOriginal      whether the stem itself should be inserted using
     *                           the canonical no-op patch command
     * @param reductionSettings  reduction settings
     * @param traversalDirection traversal direction used for both trie keys and
     *                           patch commands
     * @param caseProcessingMode case processing mode used during dictionary parsing
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection,
            final CaseProcessingMode caseProcessingMode) throws IOException {
        return load(path, storeOriginal, reductionSettings, traversalDirection, caseProcessingMode,
                DiacriticProcessingMode.AS_IS);
    }

    /**
     * Loads a dictionary from a filesystem path using explicit reduction settings,
     * traversal direction, case processing mode, and diacritic processing mode.
     *
     * @param path                    path to the dictionary file
     * @param storeOriginal           whether the stem itself should be inserted
     *                                using the canonical no-op patch command
     * @param reductionSettings       reduction settings
     * @param traversalDirection      traversal direction used for both trie keys
     *                                and patch commands
     * @param caseProcessingMode      case processing mode used during dictionary
     *                                parsing
     * @param diacriticProcessingMode diacritic processing mode used during
     *                                dictionary parsing
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection,
            final CaseProcessingMode caseProcessingMode, final DiacriticProcessingMode diacriticProcessingMode)
            throws IOException {
        Objects.requireNonNull(path, "path");
        final TrieMetadata metadata = metadataForCompilation(traversalDirection, reductionSettings, caseProcessingMode,
                diacriticProcessingMode);
        return load(path, storeOriginal, metadata);
    }

    /**
     * Loads a dictionary from a filesystem path using explicit trie compilation
     * metadata.
     *
     * <p>
     * The supplied metadata is the authoritative source of trie compilation
     * semantics. Callers should ensure metadata matches how they expect to query
     * the trie (for example, with or without lowercasing or diacritic stripping).
     * </p>
     *
     * @param path          path to the dictionary file
     * @param storeOriginal whether the stem itself should be inserted using the
     *                      canonical no-op patch command
     * @param metadata      trie metadata describing the compilation configuration
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal, final TrieMetadata metadata)
            throws IOException {
        Objects.requireNonNull(path, "path");
        Objects.requireNonNull(metadata, "metadata");

        try (InputStream inputStream = openDictionaryInputStream(path);
                BufferedReader reader = new BufferedReader(
                        new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
            return load(reader, path.toAbsolutePath().toString(), storeOriginal, metadata);
        }
    }

    /**
     * Loads a dictionary from a filesystem path using default settings for the
     * supplied reduction mode.
     *
     * <p>
     * This overload is equivalent to calling
     * {@link #load(Path, boolean, ReductionSettings)} with
     * {@link ReductionSettings#withDefaults(ReductionMode)} and therefore uses
     * implicit defaults ({@link WordTraversalDirection#BACKWARD},
     * {@link CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT},
     * {@link DiacriticProcessingMode#AS_IS}).
     * </p>
     *
     * @param path          path to the dictionary file
     * @param storeOriginal whether the stem itself should be inserted using the
     *                      canonical no-op patch command
     * @param reductionMode reduction mode
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
            final ReductionMode reductionMode) throws IOException {
        Objects.requireNonNull(reductionMode, "reductionMode");
        return load(path, storeOriginal, ReductionSettings.withDefaults(reductionMode));
    }

    /**
     * Loads a dictionary from a filesystem path string using explicit reduction
     * settings.
     *
     * <p>
     * Same semantics as {@link #load(Path, boolean, ReductionSettings)} including
     * implicit defaults ({@link WordTraversalDirection#BACKWARD},
     * {@link CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT},
     * {@link DiacriticProcessingMode#AS_IS}).
     * </p>
     *
     * @param fileName          file name or path string
     * @param storeOriginal     whether the stem itself should be inserted using the
     *                          canonical no-op patch command
     * @param reductionSettings reduction settings
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
            final ReductionSettings reductionSettings) throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return load(Path.of(fileName), storeOriginal, reductionSettings);
    }

    /**
     * Loads a dictionary from a filesystem path string using explicit reduction
     * settings and explicit traversal direction.
     *
     * <p>
     * Same semantics as
     * {@link #load(Path, boolean, ReductionSettings, WordTraversalDirection)}.
     * Implicit defaults remain
     * {@link CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT} and
     * {@link DiacriticProcessingMode#AS_IS}.
     * </p>
     *
     * @param fileName           file name or path string
     * @param storeOriginal      whether the stem itself should be inserted using
     *                           the canonical no-op patch command
     * @param reductionSettings  reduction settings
     * @param traversalDirection traversal direction used for both trie keys and
     *                           patch commands
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection)
            throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return load(Path.of(fileName), storeOriginal, reductionSettings, traversalDirection,
                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
    }

    /**
     * Loads a dictionary from a filesystem path string using explicit reduction
     * settings, explicit traversal direction, and explicit case processing mode.
     *
     * <p>
     * Same semantics as
     * {@link #load(Path, boolean, ReductionSettings, WordTraversalDirection, CaseProcessingMode)}.
     * Implicit default remains {@link DiacriticProcessingMode#AS_IS}.
     * </p>
     *
     * @param fileName           file name or path string
     * @param storeOriginal      whether the stem itself should be inserted using
     *                           the canonical no-op patch command
     * @param reductionSettings  reduction settings
     * @param traversalDirection traversal direction used for both trie keys and
     *                           patch commands
     * @param caseProcessingMode case processing mode used during dictionary parsing
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection,
            final CaseProcessingMode caseProcessingMode) throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return load(Path.of(fileName), storeOriginal, reductionSettings, traversalDirection, caseProcessingMode,
                DiacriticProcessingMode.AS_IS);
    }

    /**
     * Loads a dictionary from a filesystem path string using explicit reduction
     * settings, explicit traversal direction, explicit case processing mode, and
     * explicit diacritic processing mode.
     *
     * @param fileName                file name or path string
     * @param storeOriginal           whether the stem itself should be inserted
     *                                using the canonical no-op patch command
     * @param reductionSettings       reduction settings
     * @param traversalDirection      traversal direction used for both trie keys
     *                                and patch commands
     * @param caseProcessingMode      case processing mode used during dictionary
     *                                parsing
     * @param diacriticProcessingMode diacritic processing mode used during
     *                                dictionary parsing
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection,
            final CaseProcessingMode caseProcessingMode, final DiacriticProcessingMode diacriticProcessingMode)
            throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return load(Path.of(fileName), storeOriginal, reductionSettings, traversalDirection, caseProcessingMode,
                diacriticProcessingMode);
    }

    /**
     * Loads a dictionary from a filesystem path string using explicit trie
     * compilation metadata.
     *
     * <p>
     * Same semantics as {@link #load(Path, boolean, TrieMetadata)}.
     * </p>
     *
     * @param fileName      file name or path string
     * @param storeOriginal whether the stem itself should be inserted using the
     *                      canonical no-op patch command
     * @param metadata      trie metadata describing the compilation configuration
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
            final TrieMetadata metadata) throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return load(Path.of(fileName), storeOriginal, metadata);
    }

    /**
     * Loads a dictionary from a filesystem path string using default settings for
     * the supplied reduction mode.
     *
     * <p>
     * Equivalent to {@link #load(Path, boolean, ReductionMode)} and therefore uses
     * implicit defaults ({@link WordTraversalDirection#BACKWARD},
     * {@link CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT},
     * {@link DiacriticProcessingMode#AS_IS}).
     * </p>
     *
     * @param fileName      file name or path string
     * @param storeOriginal whether the stem itself should be inserted using the
     *                      canonical no-op patch command
     * @param reductionMode reduction mode
     * @return compiled patch-command trie
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if the file cannot be opened or read
     */
    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
            final ReductionMode reductionMode) throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return load(Path.of(fileName), storeOriginal, reductionMode);
    }

    /**
     * Parses one dictionary and builds the compiled trie.
     *
     * @param reader            dictionary reader
     * @param sourceDescription logical source description used for diagnostics
     * @param storeOriginal     whether the stem itself should be inserted using the
     *                          canonical no-op patch command
     * @param metadata          trie metadata used to drive all compilation settings
     * @return compiled patch-command trie
     * @throws IOException if parsing fails
     */
    private static FrequencyTrie<String> load(final BufferedReader reader, final String sourceDescription,
            final boolean storeOriginal, final TrieMetadata metadata) throws IOException {
        final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new,
                metadata.reductionSettings(), metadata.traversalDirection(), metadata.caseProcessingMode(),
                metadata.diacriticProcessingMode());
        final PatchCommandEncoder patchCommandEncoder = PatchCommandEncoder.builder()
                .traversalDirection(metadata.traversalDirection()).build();
        final int[] insertedMappings = new int[1];

        final StemmerDictionaryParser.ParseStatistics statistics = StemmerDictionaryParser.parse(reader,
                sourceDescription, metadata.caseProcessingMode(), (stem, variants, lineNumber) -> {
                    if (storeOriginal) {
                        builder.put(stem, NOOP_PATCH_COMMAND);
                        insertedMappings[0]++;
                    }

                    for (String variant : variants) {
                        if (!variant.equals(stem)) {
                            builder.put(variant, patchCommandEncoder.encode(variant, stem));
                            insertedMappings[0]++;
                        }
                    }
                });

        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE,
                    "Loaded stemmer dictionary from {0}; insertedMappings={1}, lines={2}, entries={3}, ignoredLines={4}, metadata={5}.",
                    new Object[] { sourceDescription, insertedMappings[0], statistics.lineCount(),
                            statistics.entryCount(), statistics.ignoredLineCount(), metadata.toTextBlock() });
        }

        return builder.build();
    }

    private static TrieMetadata metadataForCompilation(final WordTraversalDirection traversalDirection,
            final ReductionSettings reductionSettings, final CaseProcessingMode caseProcessingMode,
            final DiacriticProcessingMode diacriticProcessingMode) {
        Objects.requireNonNull(traversalDirection, "traversalDirection");
        Objects.requireNonNull(reductionSettings, "reductionSettings");
        Objects.requireNonNull(caseProcessingMode, "caseProcessingMode");
        Objects.requireNonNull(diacriticProcessingMode, "diacriticProcessingMode");
        return TrieMetadata.forCompilation(traversalDirection, reductionSettings, diacriticProcessingMode,
                caseProcessingMode);
    }

    /**
     * Resolves the traversal direction implied by a bundled language definition.
     *
     * @param language bundled language
     * @return traversal direction to use for that language
     */
    private static WordTraversalDirection traversalDirectionOf(final Language language) {
        return language.isRightToLeft() ? WordTraversalDirection.FORWARD : WordTraversalDirection.BACKWARD;
    }

    /**
     * Loads a GZip-compressed binary patch-command trie from a filesystem path.
     *
     * @param path path to the compressed binary trie file
     * @return compiled patch-command trie
     * @throws NullPointerException if {@code path} is {@code null}
     * @throws IOException          if the file cannot be opened, decompressed, or
     *                              read
     */
    public static FrequencyTrie<String> loadBinary(final Path path) throws IOException {
        Objects.requireNonNull(path, "path");
        return StemmerPatchTrieBinaryIO.read(path);
    }

    /**
     * Loads a GZip-compressed binary patch-command trie from a filesystem path
     * string.
     *
     * @param fileName file name or path string
     * @return compiled patch-command trie
     * @throws NullPointerException if {@code fileName} is {@code null}
     * @throws IOException          if the file cannot be opened, decompressed, or
     *                              read
     */
    public static FrequencyTrie<String> loadBinary(final String fileName) throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return StemmerPatchTrieBinaryIO.read(fileName);
    }

    /**
     * Loads a GZip-compressed binary patch-command trie from an input stream.
     *
     * @param inputStream source input stream
     * @return compiled patch-command trie
     * @throws NullPointerException if {@code inputStream} is {@code null}
     * @throws IOException          if the stream cannot be decompressed or read
     */
    public static FrequencyTrie<String> loadBinary(final InputStream inputStream) throws IOException {
        Objects.requireNonNull(inputStream, "inputStream");
        return StemmerPatchTrieBinaryIO.read(inputStream);
    }

    /**
     * Loads only persisted metadata from a GZip-compressed binary patch-command
     * trie file.
     *
     * @param path path to the compressed binary trie file
     * @return persisted trie metadata
     * @throws NullPointerException if {@code path} is {@code null}
     * @throws IOException          if the file cannot be opened, decompressed, or
     *                              read
     */
    public static TrieMetadata loadBinaryMetadata(final Path path) throws IOException {
        Objects.requireNonNull(path, "path");
        return StemmerPatchTrieBinaryIO.readMetadata(path);
    }

    /**
     * Loads only persisted metadata from a GZip-compressed binary patch-command
     * trie file.
     *
     * @param fileName file name or path string
     * @return persisted trie metadata
     * @throws NullPointerException if {@code fileName} is {@code null}
     * @throws IOException          if the file cannot be opened, decompressed, or
     *                              read
     */
    public static TrieMetadata loadBinaryMetadata(final String fileName) throws IOException {
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        return StemmerPatchTrieBinaryIO.readMetadata(fileName);
    }

    /**
     * Loads only persisted metadata from a GZip-compressed binary patch-command
     * trie stream.
     *
     * @param inputStream source input stream
     * @return persisted trie metadata
     * @throws NullPointerException if {@code inputStream} is {@code null}
     * @throws IOException          if the stream cannot be decompressed or read
     */
    public static TrieMetadata loadBinaryMetadata(final InputStream inputStream) throws IOException {
        Objects.requireNonNull(inputStream, "inputStream");
        return StemmerPatchTrieBinaryIO.readMetadata(inputStream);
    }

    /**
     * Saves a compiled patch-command trie as a GZip-compressed binary file.
     *
     * @param trie compiled trie
     * @param path target file
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if writing fails
     */
    public static void saveBinary(final FrequencyTrie<String> trie, final Path path) throws IOException {
        Objects.requireNonNull(trie, "trie");
        Objects.requireNonNull(path, "path");
        StemmerPatchTrieBinaryIO.write(trie, path);
    }

    /**
     * Saves a compiled patch-command trie as a GZip-compressed binary file.
     *
     * @param trie     compiled trie
     * @param fileName target file name or path string
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if writing fails
     */
    public static void saveBinary(final FrequencyTrie<String> trie, final String fileName) throws IOException {
        Objects.requireNonNull(trie, "trie");
        Objects.requireNonNull(fileName, FILENAME_REQUIRED);
        StemmerPatchTrieBinaryIO.write(trie, fileName);
    }

    /**
     * Opens one filesystem dictionary input stream.
     *
     * <p>
     * Plain-text dictionaries are returned as-is. GZip-compressed dictionaries are
     * detected from the stream header rather than from the file extension so that
     * callers may provide arbitrary temporary file names without changing the
     * loading contract.
     * </p>
     *
     * @param path dictionary file path
     * @return opened dictionary stream, transparently decompressing GZip inputs
     * @throws IOException if the file cannot be opened
     */
    private static InputStream openDictionaryInputStream(final Path path) throws IOException {
        final PushbackInputStream pushbackInputStream = new PushbackInputStream(
                new BufferedInputStream(Files.newInputStream(path)), 2);
        final byte[] header = pushbackInputStream.readNBytes(2);

        if (header.length > 0) {
            pushbackInputStream.unread(header);
        }

        if (header.length == 2 && (header[0] & 0xFF) == 0x1F && (header[1] & 0xFF) == 0x8B) {
            return new GZIPInputStream(pushbackInputStream);
        }

        return pushbackInputStream;
    }

    /**
     * Opens a bundled resource from the classpath.
     *
     * @param resourcePath classpath resource path
     * @return opened input stream
     * @throws IOException if the resource cannot be found
     */
    /* default */ static InputStream openBundledResource(final String resourcePath) throws IOException {
        final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        final InputStream inputStream = classLoader.getResourceAsStream(resourcePath);
        if (inputStream == null) {
            throw new IOException("Stemmer resource not found: " + resourcePath);
        }
        return new GZIPInputStream(inputStream);
    }
}