/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.tokenizer.word;

import ai.grazie.nlp.tokenizer.Tokenizer;
import java.util.ArrayList;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.ranges.IntRange;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 4, 2}, bv={1, 0, 3}, k=1, d1={"\u00000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000b\n\u0002\b\u0004\n\u0002\u0010\f\n\u0002\b\u0002\n\u0002\u0010\b\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\u0018\u00002\u00020\u0001B\u0019\u0012\b\b\u0002\u0010\u0002\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0004\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0005J\u0010\u0010\u0006\u001a\u00020\u00032\u0006\u0010\u0007\u001a\u00020\bH\u0002J\u0018\u0010\t\u001a\u00020\u00032\u0006\u0010\n\u001a\u00020\u000b2\u0006\u0010\u0007\u001a\u00020\bH\u0002J\u0016\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u000e0\r2\u0006\u0010\u000f\u001a\u00020\u0010H\u0016R\u000e\u0010\u0004\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0011"}, d2={"Lai/grazie/nlp/tokenizer/word/HeuristicWordTokenizer;", "Lai/grazie/nlp/tokenizer/Tokenizer;", "tokenizeByCamelCase", "", "tokenizeByApostrophe", "(ZZ)V", "isHieroglyphic", "char", "", "isNonSplittingPart", "type", "", "tokenize", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "text", "", "nlp-tokenizer"})
public final class HeuristicWordTokenizer
implements Tokenizer {
    private final boolean tokenizeByCamelCase;
    private final boolean tokenizeByApostrophe;

    /*
     * Unable to fully structure code
     */
    @Override
    @NotNull
    public List<Tokenizer.Token> tokenize(@NotNull String text) {
        block20: {
            Intrinsics.checkNotNullParameter((Object)text, (String)"text");
            var2_2 = text;
            var3_3 = false;
            if (var2_2.length() == 0) {
                return CollectionsKt.emptyList();
            }
            $this$all$iv = text;
            $i$f$all = false;
            var4_4 = $this$all$iv;
            for (var5_6 = 0; var5_6 < var4_4.length(); ++var5_6) {
                it = element$iv = var4_4.charAt(var5_6);
                $i$a$-all-HeuristicWordTokenizer$tokenize$1 = false;
                var9_12 = it;
                var10_18 = false;
                if (!Character.isLowerCase(var9_12) && this.tokenizeByCamelCase) ** GOTO lbl-1000
                var9_12 = it;
                var10_18 = false;
                if (Character.isLetter(var9_12)) {
                    v0 = true;
                } else lbl-1000:
                // 2 sources

                {
                    v0 = false;
                }
                if (v0) continue;
                v1 = false;
                break block20;
            }
            v1 = true;
        }
        if (v1) {
            return CollectionsKt.listOf((Object)new Tokenizer.Token(text, new IntRange(0, text.length() - 1)));
        }
        result = new ArrayList<Tokenizer.Token>();
        index = 0;
        start = -1;
        prevType = 25;
        prevChar = '_';
        while (index < text.length()) {
            curChar = text.charAt(index);
            if (this.isHieroglyphic(curChar)) {
                if (start >= 0) {
                    $i$a$-all-HeuristicWordTokenizer$tokenize$1 = text;
                    var9_13 = false;
                    v2 = $i$a$-all-HeuristicWordTokenizer$tokenize$1.substring(start, index);
                    Intrinsics.checkNotNullExpressionValue((Object)v2, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                    result.add(new Tokenizer.Token(v2, new IntRange(start, index - 1)));
                    start = -1;
                }
                prevType = 25;
                ++index;
                continue;
            }
            curType = Character.getType(curChar);
            if (this.isNonSplittingPart(curType, curChar)) {
                if (!this.isNonSplittingPart(prevType, prevChar) && start >= 0) {
                    var9_14 = text;
                    var10_18 = false;
                    v3 = var9_14.substring(start, index);
                    Intrinsics.checkNotNullExpressionValue((Object)v3, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                    result.add(new Tokenizer.Token(v3, new IntRange(start, index - 1)));
                    start = -1;
                }
                if (start < 0) {
                    start = index;
                } else if (this.tokenizeByCamelCase && curType == 1 && prevType == 2) {
                    var9_15 = text;
                    var10_18 = false;
                    v4 = var9_15.substring(start, index);
                    Intrinsics.checkNotNullExpressionValue((Object)v4, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                    result.add(new Tokenizer.Token(v4, new IntRange(start, index - 1)));
                    start = index;
                } else if (this.tokenizeByCamelCase && index - start > 1 && curType == 2 && prevType == 1) {
                    var9_16 = text;
                    var10_18 = false;
                    v5 = var9_16.substring(start, index);
                    Intrinsics.checkNotNullExpressionValue((Object)v5, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                    result.add(new Tokenizer.Token(v5, new IntRange(start, index - 1)));
                    start = index - 1;
                }
            } else if (start >= 0 && (this.isNonSplittingPart(prevType, prevChar) || prevType != curType)) {
                var9_17 = text;
                var10_18 = false;
                v6 = var9_17.substring(start, index);
                Intrinsics.checkNotNullExpressionValue((Object)v6, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                result.add(new Tokenizer.Token(v6, new IntRange(start, index - 1)));
                start = index;
            } else if (start < 0) {
                start = index;
            }
            prevType = curType;
            prevChar = curChar;
            ++index;
        }
        if (start >= 0) {
            var7_9 = text;
            var8_10 = false;
            v7 = var7_9.substring(start, index);
            Intrinsics.checkNotNullExpressionValue((Object)v7, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
            result.add(new Tokenizer.Token(v7, new IntRange(start, index - 1)));
        }
        return result;
    }

    private final boolean isNonSplittingPart(int type, char c) {
        return type == 2 || type == 1 || type == 3 || type == 5 || type == 4 || !this.tokenizeByApostrophe && c == '\'';
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private final boolean isHieroglyphic(char c) {
        char c2 = c;
        if ('\u3040' <= c2) {
            if ('\u309f' >= c2) return true;
        }
        c2 = c;
        if ('\u30a0' <= c2) {
            if ('\u30ff' >= c2) return true;
        }
        c2 = c;
        if ('\u4e00' <= c2) {
            if ('\u9fff' >= c2) return true;
        }
        c2 = c;
        if ('\uf900' <= c2) {
            if ('\ufaff' >= c2) return true;
        }
        c2 = c;
        if ('\uff00' > c2) return false;
        if ('\uffef' < c2) return false;
        return true;
    }

    public HeuristicWordTokenizer(boolean tokenizeByCamelCase, boolean tokenizeByApostrophe) {
        this.tokenizeByCamelCase = tokenizeByCamelCase;
        this.tokenizeByApostrophe = tokenizeByApostrophe;
    }

    public /* synthetic */ HeuristicWordTokenizer(boolean bl, boolean bl2, int n, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n & 1) != 0) {
            bl = false;
        }
        if ((n & 2) != 0) {
            bl2 = true;
        }
        this(bl, bl2);
    }

    public HeuristicWordTokenizer() {
        this(false, false, 3, null);
    }
}

