LocalValueSummary.java

/*******************************************************************************
 * Copyright (C) 2026, Leo Galambos
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 ******************************************************************************/
package org.egothor.stemmer.trie;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.IntFunction;

import org.egothor.stemmer.ReductionSettings;

/**
 * Local terminal value summary of a node.
 *
 * @param <V> value type
 */
public final class LocalValueSummary<V> {

    /**
     * Locally stored values ordered by descending frequency.
     */
    private final V[] orderedValues;

    /**
     * Frequencies aligned with {@link #orderedValues}.
     */
    private final int[] orderedCounts;

    /**
     * Total local frequency.
     */
    private final int totalCount;

    /**
     * Winning value, or {@code null} if the node has no local value.
     */
    /* default */ final V dominantValue;

    /**
     * Winning value frequency.
     */
    private final int dominantCount;

    /**
     * Second best value frequency.
     */
    private final int secondCount;

    /**
     * Creates a summary.
     *
     * @param orderedValues ordered values
     * @param orderedCounts ordered counts
     * @param totalCount    total count
     * @param dominantValue dominant value
     * @param dominantCount dominant count
     * @param secondCount   second count
     */
    public LocalValueSummary(final V[] orderedValues, final int[] orderedCounts, final int totalCount,
            final V dominantValue, final int dominantCount, final int secondCount) {
        this.orderedValues = orderedValues;
        this.orderedCounts = orderedCounts;
        this.totalCount = totalCount;
        this.dominantValue = dominantValue;
        this.dominantCount = dominantCount;
        this.secondCount = secondCount;
    }

    /**
     * Builds a summary from local counts.
     *
     * @param counts       local counts
     * @param arrayFactory array factory
     * @param <V>          value type
     * @return summary
     */
    public static <V> LocalValueSummary<V> of(final Map<V, Integer> counts, final IntFunction<V[]> arrayFactory) {
        final List<SortableValue<V>> entries = new ArrayList<>(counts.size());
        int insertionOrder = 0;
        for (Map.Entry<V, Integer> entry : counts.entrySet()) {
            entries.add(new SortableValue<>(entry.getKey(), entry.getValue(), String.valueOf(entry.getKey()),
                    insertionOrder++));
        }

        entries.sort((left, right) -> {
            final int frequencyCompare = Integer.compare(right.count(), left.count());
            if (frequencyCompare != 0) {
                return frequencyCompare;
            }

            final int lengthCompare = Integer.compare(left.textLength(), right.textLength());
            if (lengthCompare != 0) {
                return lengthCompare;
            }

            final int textCompare = left.text().compareTo(right.text());
            if (textCompare != 0) {
                return textCompare;
            }

            return Integer.compare(left.insertionOrder(), right.insertionOrder());
        });

        final V[] orderedValues = arrayFactory.apply(entries.size());
        final int[] orderedCounts = new int[entries.size()];

        int totalCount = 0;
        for (int index = 0; index < entries.size(); index++) {
            final SortableValue<V> entry = entries.get(index);
            orderedValues[index] = entry.value();
            orderedCounts[index] = entry.count();
            totalCount += orderedCounts[index];
        }

        final V dominantValue = orderedValues.length == 0 ? null : orderedValues[0];
        final int dominantCount = orderedCounts.length == 0 ? 0 : orderedCounts[0];
        final int secondCount = orderedCounts.length < 2 ? 0 : orderedCounts[1];

        return new LocalValueSummary<>(orderedValues, orderedCounts, totalCount, dominantValue, dominantCount,
                secondCount);
    }

    /**
     * Returns ordered values.
     *
     * @return ordered values
     */
    @SuppressWarnings("PMD.MethodReturnsInternalArray")
    public V[] orderedValues() {
        return this.orderedValues;
    }

    /**
     * Returns ordered counts.
     *
     * @return ordered counts
     */
    @SuppressWarnings("PMD.MethodReturnsInternalArray")
    public int[] orderedCounts() {
        return this.orderedCounts;
    }

    /**
     * Indicates whether the dominant value satisfies the configured dominance
     * constraints.
     *
     * @param settings reduction settings
     * @return {@code true} if dominant, otherwise {@code false}
     */
    /* default */ boolean hasQualifiedDominantWinner(final ReductionSettings settings) {
        if (this.dominantValue == null) {
            return false;
        }

        final int thresholdPercent = settings.dominantWinnerMinPercent();
        final int ratio = settings.dominantWinnerOverSecondRatio();

        final boolean percentSatisfied = this.dominantCount * 100L >= (long) this.totalCount * thresholdPercent;

        final boolean ratioSatisfied;
        if (this.secondCount == 0) {
            ratioSatisfied = true;
        } else {
            ratioSatisfied = this.dominantCount >= (long) this.secondCount * ratio;
        }

        return percentSatisfied && ratioSatisfied;
    }
}