/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.detector.ngram;

import ai.grazie.detector.ngram.NgramEnumeratorKt;
import ai.grazie.detector.ngram.impl.NgramExtractor;
import ai.grazie.detector.ngram.profiles.LanguageProfile;
import ai.grazie.detector.utils.filter.AggregatedTextFilter;
import ai.grazie.detector.utils.filter.TextFilter;
import ai.grazie.nlp.langs.Language;
import java.io.File;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Stream;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.io.FilesKt;
import kotlin.jdk7.AutoCloseableKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.CharsKt;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={2, 1, 0}, k=2, xi=48, d1={"\u0000\u001a\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010\u0002\n\u0002\b\u0004\n\u0002\u0010$\n\u0002\u0010\b\n\u0000\u001a\u0006\u0010\u0002\u001a\u00020\u0003\u001a\b\u0010\u0004\u001a\u00020\u0003H\u0002\u001a$\u0010\u0005\u001a\u00020\u00032\u0006\u0010\u0006\u001a\u00020\u00012\u0012\u0010\u0007\u001a\u000e\u0012\u0004\u0012\u00020\u0001\u0012\u0004\u0012\u00020\t0\bH\u0002\"\u000e\u0010\u0000\u001a\u00020\u0001X\u0082T\u00a2\u0006\u0002\n\u0000\u00a8\u0006\n"}, d2={"ngramsLocation", "", "main", "", "generateNgrams", "saveNgram", "iso", "freq", "", "", "nlp-detect"})
@SourceDebugExtension(value={"SMAP\nNgramEnumerator.kt\nKotlin\n*S Kotlin\n*F\n+ 1 NgramEnumerator.kt\nai/grazie/detector/ngram/NgramEnumeratorKt\n+ 2 Maps.kt\nkotlin/collections/MapsKt__MapsKt\n+ 3 _Sequences.kt\nkotlin/sequences/SequencesKt___SequencesKt\n+ 4 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 5 _Strings.kt\nkotlin/text/StringsKt___StringsKt\n*L\n1#1,70:1\n535#2:71\n520#2,6:72\n607#3:78\n607#3:79\n1317#3,2:80\n1863#4:82\n1864#4:85\n1069#5,2:83\n*S KotlinDebug\n*F\n+ 1 NgramEnumerator.kt\nai/grazie/detector/ngram/NgramEnumeratorKt\n*L\n51#1:71\n51#1:72,6\n60#1:78\n61#1:79\n62#1:80,2\n41#1:82\n41#1:85\n42#1:83,2\n*E\n"})
public final class NgramEnumeratorKt {
    @NotNull
    private static final String ngramsLocation = "src/commonMain/resources/ngrams";

    public static final void main() {
        NgramEnumeratorKt.generateNgrams();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * WARNING - void declaration
     */
    private static final void generateNgrams() {
        void $this$forEach$iv;
        Sequence $this$sortedBy$iv;
        Sequence $this$filterTo$iv$iv;
        String corpusPath = "path_to_corpus";
        Language lang = Language.UKRAINIAN;
        int normTrigramsBy = 200;
        Regex nonLetter = new Regex("[^\\p{javaLetter}]+");
        Regex multipleSpace = new Regex("\\s{2,}");
        ConcurrentHashMap ngramCounts = new ConcurrentHashMap();
        AutoCloseable autoCloseable = Files.walk(Path.of(corpusPath, new String[0]), new FileVisitOption[0]);
        Throwable throwable = null;
        try {
            Stream files = (Stream)autoCloseable;
            boolean bl = false;
            files.forEach(arg_0 -> NgramEnumeratorKt.generateNgrams$lambda$7$lambda$6(arg_0 -> NgramEnumeratorKt.generateNgrams$lambda$7$lambda$5(nonLetter, multipleSpace, ngramCounts, lang, arg_0), arg_0));
            files = Unit.INSTANCE;
        }
        catch (Throwable bl) {
            throwable = bl;
            throw bl;
        }
        finally {
            AutoCloseableKt.closeFinally((AutoCloseable)autoCloseable, (Throwable)throwable);
        }
        Map $this$filter$iv = ngramCounts;
        boolean $i$f$filter = false;
        Map bl = $this$filter$iv;
        Map destination$iv$iv = new LinkedHashMap();
        boolean $i$f$filterTo = false;
        Iterator iterator = $this$filterTo$iv$iv.entrySet().iterator();
        while (iterator.hasNext()) {
            Map.Entry element$iv$iv;
            Map.Entry entry2 = element$iv$iv = iterator.next();
            boolean bl2 = false;
            String k = (String)entry2.getKey();
            if (!(k.length() == 3)) continue;
            destination$iv$iv.put(element$iv$iv.getKey(), element$iv$iv.getValue());
        }
        int maxValue = ((Number)((Object)CollectionsKt.maxOrThrow((Iterable)destination$iv$iv.values()))).intValue();
        int divideBy = maxValue / normTrigramsBy;
        System.out.println((Object)("Found " + ngramCounts.size() + " ngrams, max trigram frequency " + maxValue + ". Dividing everything by " + divideBy + " for smaller file size"));
        LinkedHashMap sortedMap = new LinkedHashMap();
        $this$filterTo$iv$iv = SequencesKt.filter((Sequence)SequencesKt.map((Sequence)MapsKt.asSequence((Map)ngramCounts), arg_0 -> NgramEnumeratorKt.generateNgrams$lambda$9(divideBy, arg_0)), NgramEnumeratorKt::generateNgrams$lambda$10);
        boolean $i$f$sortedBy = false;
        $this$sortedBy$iv = SequencesKt.sortedWith((Sequence)$this$sortedBy$iv, (Comparator)new Comparator(){

            public final int compare(T a, T b) {
                Pair it = (Pair)a;
                boolean bl = false;
                Comparable comparable = (Comparable)((Object)((String)it.getFirst()));
                it = (Pair)b;
                Comparable comparable2 = comparable;
                bl = false;
                return ComparisonsKt.compareValues((Comparable)comparable2, (Comparable)((Comparable)((Object)((String)it.getFirst()))));
            }
        });
        $i$f$sortedBy = false;
        $this$sortedBy$iv = SequencesKt.sortedWith((Sequence)$this$sortedBy$iv, (Comparator)new Comparator(){

            public final int compare(T a, T b) {
                Pair it = (Pair)a;
                boolean bl = false;
                Comparable comparable = Integer.valueOf(((String)it.getFirst()).length());
                it = (Pair)b;
                Comparable comparable2 = comparable;
                bl = false;
                return ComparisonsKt.compareValues((Comparable)comparable2, (Comparable)Integer.valueOf(((String)it.getFirst()).length()));
            }
        });
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            Pair pair = (Pair)element$iv;
            boolean bl3 = false;
            String k = (String)pair.component1();
            int v = ((Number)pair.component2()).intValue();
            Integer n = v;
            ((Map)sortedMap).put(k, n);
        }
        NgramEnumeratorKt.saveNgram(lang.getIso().toString(), sortedMap);
    }

    private static final void saveNgram(String iso, Map<String, Integer> freq) {
        FilesKt.writeText$default((File)new File("src/commonMain/resources/ngrams/" + iso), (String)LanguageProfile.Companion.serialize(freq), null, (int)2, null);
    }

    public static /* synthetic */ void main(String[] args) {
        NgramEnumeratorKt.main();
    }

    private static final Integer generateNgrams$lambda$7$lambda$5$lambda$3$lambda$2$lambda$1(Function2 $tmp0, Object p0, Object p1) {
        return (Integer)$tmp0.invoke(p0, p1);
    }

    private static final Unit generateNgrams$lambda$7$lambda$5$lambda$3(Regex $nonLetter, Regex $multipleSpace, ConcurrentHashMap $ngramCounts, Language $lang, String line) {
        AggregatedTextFilter aggregatedTextFilter = TextFilter.Companion.getDefault();
        Intrinsics.checkNotNull((Object)line);
        CharSequence charSequence = aggregatedTextFilter.filter(line);
        String string = " ";
        charSequence = $nonLetter.replace(charSequence, string);
        string = " ";
        String clean = $multipleSpace.replace(charSequence, string);
        Iterable $this$forEach$iv = NgramExtractor.Companion.getStandard().extract(clean);
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            boolean bl;
            String ngram;
            block2: {
                ngram = (String)element$iv;
                boolean bl2 = false;
                CharSequence $this$all$iv = ngram;
                boolean $i$f$all = false;
                for (int i = 0; i < $this$all$iv.length(); ++i) {
                    char element$iv2;
                    char c = element$iv2 = $this$all$iv.charAt(i);
                    boolean bl3 = false;
                    if (CharsKt.isWhitespace((char)c) || $lang.getAlphabet().matchEntire(String.valueOf(c))) continue;
                    bl = false;
                    break block2;
                }
                bl = true;
            }
            if (!bl) continue;
            $ngramCounts.merge(ngram, 1, (arg_0, arg_1) -> NgramEnumeratorKt.generateNgrams$lambda$7$lambda$5$lambda$3$lambda$2$lambda$1(generateNgrams.1.1.1.1.2.INSTANCE, arg_0, arg_1));
        }
        return Unit.INSTANCE;
    }

    private static final void generateNgrams$lambda$7$lambda$5$lambda$4(Function1 $tmp0, Object p0) {
        $tmp0.invoke(p0);
    }

    private static final Unit generateNgrams$lambda$7$lambda$5(Regex $nonLetter, Regex $multipleSpace, ConcurrentHashMap $ngramCounts, Language $lang, Path file) {
        if (Files.isRegularFile(file, new LinkOption[0])) {
            ((Stream)Files.lines(file).parallel()).forEach(arg_0 -> NgramEnumeratorKt.generateNgrams$lambda$7$lambda$5$lambda$4(arg_0 -> NgramEnumeratorKt.generateNgrams$lambda$7$lambda$5$lambda$3($nonLetter, $multipleSpace, $ngramCounts, $lang, arg_0), arg_0));
        }
        return Unit.INSTANCE;
    }

    private static final void generateNgrams$lambda$7$lambda$6(Function1 $tmp0, Object p0) {
        $tmp0.invoke(p0);
    }

    private static final Pair generateNgrams$lambda$9(int $divideBy, Map.Entry entry2) {
        Intrinsics.checkNotNullParameter((Object)entry2, (String)"<destruct>");
        String k = (String)entry2.getKey();
        int v = ((Number)entry2.getValue()).intValue();
        return TuplesKt.to((Object)k, (Object)(v / $divideBy));
    }

    private static final boolean generateNgrams$lambda$10(Pair it) {
        Intrinsics.checkNotNullParameter((Object)it, (String)"it");
        return ((Number)it.getSecond()).intValue() > 1;
    }
}

