StemmerPatchTrieBinaryIO.java

/*******************************************************************************
 * Copyright (C) 2026, Leo Galambos
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 ******************************************************************************/
package org.egothor.stemmer;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/**
 * Binary persistence helper for patch-command stemmer tries.
 *
 * <p>
 * This class persists {@link FrequencyTrie} instances whose values are compact
 * patch commands represented as {@link String}. The serialized trie payload is
 * the native binary format of {@link FrequencyTrie}, wrapped in GZip
 * compression.
 *
 * <p>
 * The helper centralizes the codec and compression details so that higher-level
 * loader APIs can remain focused on source selection rather than stream
 * mechanics.
 */
public final class StemmerPatchTrieBinaryIO {

    /**
     * Logger of this class.
     */
    private static final Logger LOGGER = Logger.getLogger(StemmerPatchTrieBinaryIO.class.getName());

    /**
     * Value codec for persisted patch-command strings.
     */
    private static final FrequencyTrie.ValueStreamCodec<String> STRING_CODEC = new StringValueStreamCodec();

    /**
     * Utility class.
     */
    private StemmerPatchTrieBinaryIO() {
        throw new AssertionError("No instances.");
    }

    /**
     * Reads a GZip-compressed binary patch-command trie from a filesystem path.
     *
     * @param path source file
     * @return deserialized trie
     * @throws NullPointerException if {@code path} is {@code null}
     * @throws IOException          if reading or decompression fails
     */
    public static FrequencyTrie<String> read(final Path path) throws IOException {
        Objects.requireNonNull(path, "path");

        try (InputStream fileInputStream = Files.newInputStream(path)) {
            return read(fileInputStream);
        }
    }

    /**
     * Reads a GZip-compressed binary patch-command trie from a filesystem path
     * string.
     *
     * @param fileName source file name or path string
     * @return deserialized trie
     * @throws NullPointerException if {@code fileName} is {@code null}
     * @throws IOException          if reading or decompression fails
     */
    public static FrequencyTrie<String> read(final String fileName) throws IOException {
        Objects.requireNonNull(fileName, "fileName");
        return read(Path.of(fileName));
    }

    /**
     * Reads a GZip-compressed binary patch-command trie from an input stream.
     *
     * <p>
     * The supplied stream is consumed but not interpreted as plain trie bytes; it
     * is first decompressed using {@link GZIPInputStream}.
     *
     * @param inputStream source stream
     * @return deserialized trie
     * @throws NullPointerException if {@code inputStream} is {@code null}
     * @throws IOException          if reading or decompression fails
     */
    public static FrequencyTrie<String> read(final InputStream inputStream) throws IOException {
        Objects.requireNonNull(inputStream, "inputStream");

        try (GZIPInputStream gzipInputStream = new GZIPInputStream(new BufferedInputStream(inputStream));
                DataInputStream dataInputStream = new DataInputStream(gzipInputStream)) {
            final FrequencyTrie<String> trie = FrequencyTrie.readFrom(dataInputStream, String[]::new, STRING_CODEC);

            LOGGER.log(Level.FINE, "Read compressed binary stemmer trie.");
            return trie;
        }
    }

    /**
     * Writes a GZip-compressed binary patch-command trie to a filesystem path.
     *
     * @param trie trie to persist
     * @param path target file
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if writing fails
     */
    public static void write(final FrequencyTrie<String> trie, final Path path) throws IOException {
        Objects.requireNonNull(trie, "trie");
        Objects.requireNonNull(path, "path");

        final Path parent = path.toAbsolutePath().getParent();
        if (parent != null) {
            Files.createDirectories(parent);
        }

        try (OutputStream fileOutputStream = Files.newOutputStream(path)) {
            write(trie, fileOutputStream);
        }
    }

    /**
     * Writes a GZip-compressed binary patch-command trie to a filesystem path
     * string.
     *
     * @param trie     trie to persist
     * @param fileName target file name or path string
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if writing fails
     */
    public static void write(final FrequencyTrie<String> trie, final String fileName) throws IOException {
        Objects.requireNonNull(fileName, "fileName");
        write(trie, Path.of(fileName));
    }

    /**
     * Writes a GZip-compressed binary patch-command trie to an output stream.
     *
     * @param trie         trie to persist
     * @param outputStream target stream
     * @throws NullPointerException if any argument is {@code null}
     * @throws IOException          if writing fails
     */
    public static void write(final FrequencyTrie<String> trie, final OutputStream outputStream) throws IOException {
        Objects.requireNonNull(trie, "trie");
        Objects.requireNonNull(outputStream, "outputStream");

        try (GZIPOutputStream gzipOutputStream = new GZIPOutputStream(new BufferedOutputStream(outputStream));
                DataOutputStream dataOutputStream = new DataOutputStream(gzipOutputStream)) {
            trie.writeTo(dataOutputStream, STRING_CODEC);
        }

        LOGGER.log(Level.FINE, "Wrote compressed binary stemmer trie.");
    }

    /**
     * Binary stream codec for persisted patch-command strings.
     */
    private static final class StringValueStreamCodec implements FrequencyTrie.ValueStreamCodec<String> {

        /**
         * Creates a codec instance.
         */
        private StringValueStreamCodec() {
        }

        @Override
        public void write(final DataOutputStream dataOutput, final String value) throws IOException {
            dataOutput.writeUTF(value);
        }

        @Override
        public String read(final DataInputStream dataInput) throws IOException {
            return dataInput.readUTF();
        }
    }
}