/*
 * Decompiled with CFR 0.152.
 */
package com.twitter.penguin.korean.v1.normalizer;

import com.twitter.penguin.korean.util.CharArraySet;
import com.twitter.penguin.korean.v1.util.Hangul$;
import com.twitter.penguin.korean.v1.util.KoreanDictionaryProvider$;
import com.twitter.penguin.korean.v1.util.KoreanPos$;
import java.util.regex.Matcher;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.None$;
import scala.Predef;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.runtime.BoxesRunTime;
import scala.util.matching.Regex;

public final class KoreanNormalizer$ {
    public static final KoreanNormalizer$ MODULE$;
    private final Regex EXTENTED_KOREAN_REGEX;
    private final Regex KOREAN_TO_NORMALIZE_REGEX;
    private final Regex REPEATING_CHAR_REGEX;
    private final Regex REPEATING_2CHAR_REGEX;
    private final Regex WHITESPACE_REGEX;
    private final Set<Object> CODA_N_EXCPETION;

    static {
        new KoreanNormalizer$();
    }

    public Regex REPEATING_CHAR_REGEX() {
        return this.REPEATING_CHAR_REGEX;
    }

    public Set<Object> CODA_N_EXCPETION() {
        return this.CODA_N_EXCPETION;
    }

    public CharSequence normalize(CharSequence input) {
        return this.EXTENTED_KOREAN_REGEX.replaceAllIn(input, (Function1)new Serializable(){

            public final String apply(Regex.Match m) {
                return ((Object)KoreanNormalizer$.MODULE$.com$twitter$penguin$korean$v1$normalizer$KoreanNormalizer$$normalizeKoreanChunk(m.group(0))).toString();
            }
        });
    }

    public CharSequence com$twitter$penguin$korean$v1$normalizer$KoreanNormalizer$$normalizeKoreanChunk(CharSequence input) {
        String endingNormalized = this.KOREAN_TO_NORMALIZE_REGEX.replaceAllIn(input, (Function1)new Serializable(){

            public final String apply(Regex.Match m) {
                return ((Object)KoreanNormalizer$.MODULE$.com$twitter$penguin$korean$v1$normalizer$KoreanNormalizer$$processNormalizationCandidate(m)).toString();
            }
        });
        String exclamationNormalized = this.REPEATING_CHAR_REGEX().replaceAllIn((CharSequence)endingNormalized, (Function1)new Serializable(){

            public final String apply(Regex.Match m) {
                return Matcher.quoteReplacement(((String)new StringOps(Predef$.MODULE$.augmentString(m.group(0))).take(2)).toString());
            }
        });
        String repeatingNormalized = this.REPEATING_2CHAR_REGEX.replaceAllIn((CharSequence)exclamationNormalized, (Function1)new Serializable(){

            public final String apply(Regex.Match m) {
                return Matcher.quoteReplacement(((String)new StringOps(Predef$.MODULE$.augmentString(m.group(0))).take(4)).toString());
            }
        });
        CharSequence codaNNormalized = this.normalizeCodaN(repeatingNormalized);
        CharSequence typoCorrected = this.correctTypo(codaNNormalized);
        return this.WHITESPACE_REGEX.replaceAllIn(typoCorrected, " ");
    }

    public CharSequence correctTypo(CharSequence chunk2) {
        return (CharSequence)KoreanDictionaryProvider$.MODULE$.typoDictionaryByLength().foldLeft((Object)chunk2, (Function2)new Serializable(){

            public final CharSequence apply(CharSequence x0$1, Tuple2<Object, Map<String, String>> x1$1) {
                Tuple2 tuple2 = new Tuple2((Object)x0$1, x1$1);
                if (tuple2 != null) {
                    CharSequence output = (CharSequence)tuple2._1();
                    Tuple2 tuple22 = (Tuple2)tuple2._2();
                    if (output instanceof String) {
                        String string = (String)output;
                        if (tuple22 != null) {
                            int wordLen = tuple22._1$mcI$sp();
                            Map typoMap = (Map)tuple22._2();
                            int n = wordLen;
                            if (typoMap != null) {
                                Map map = typoMap;
                                CharSequence charSequence = (CharSequence)new StringOps(Predef$.MODULE$.augmentString(string)).sliding(n).foldLeft((Object)string, (Function2)new Serializable(this, map){
                                    private final Map x5$1;

                                    public final String apply(String x0$2, String x1$2) {
                                        Tuple2 tuple2;
                                        block4: {
                                            String string;
                                            block3: {
                                                block2: {
                                                    String string2;
                                                    tuple2 = new Tuple2((Object)x0$2, (Object)x1$2);
                                                    if (tuple2 == null) break block2;
                                                    String sliceOutput = (String)tuple2._1();
                                                    String slice = (String)tuple2._2();
                                                    if (sliceOutput == null) break block2;
                                                    String string3 = sliceOutput;
                                                    if (slice == null || !this.x5$1.contains((Object)(string2 = slice))) break block2;
                                                    string = string3.replaceAll(string2, (String)this.x5$1.apply((Object)string2));
                                                    break block3;
                                                }
                                                if (tuple2 == null) break block4;
                                                String sliceOutput = (String)tuple2._1();
                                                String slice = (String)tuple2._2();
                                                if (sliceOutput == null) break block4;
                                                String string4 = sliceOutput;
                                                if (slice == null) break block4;
                                                string = string4;
                                            }
                                            return string;
                                        }
                                        throw new MatchError((Object)tuple2);
                                    }
                                    {
                                        this.x5$1 = x5$1;
                                    }
                                });
                                return charSequence;
                            }
                        }
                    }
                }
                throw new MatchError((Object)tuple2);
            }
        });
    }

    public CharSequence normalizeCodaN(CharSequence chunk2) {
        if (chunk2.length() < 2) {
            return chunk2;
        }
        CharSequence lastTwo = chunk2.subSequence(chunk2.length() - 2, chunk2.length());
        char last = chunk2.charAt(chunk2.length() - 1);
        char lastTwoHead = lastTwo.charAt(0);
        if (((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Noun())).contains(chunk2) || ((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Conjunction())).contains(chunk2) || ((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Adverb())).contains(chunk2) || ((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Noun())).contains(lastTwo) || lastTwoHead < '\uac00' || lastTwoHead > '\ud7a3' || this.CODA_N_EXCPETION().contains((Object)BoxesRunTime.boxToCharacter((char)lastTwoHead))) {
            return chunk2;
        }
        Tuple3<Object, Object, Object> tuple3 = Hangul$.MODULE$.decomposeHangul(lastTwoHead);
        if (tuple3 != null) {
            CharSequence charSequence;
            Tuple3 tuple32;
            char o = BoxesRunTime.unboxToChar((Object)tuple3._1());
            char v = BoxesRunTime.unboxToChar((Object)tuple3._2());
            char c = BoxesRunTime.unboxToChar((Object)tuple3._3());
            Tuple3 tuple33 = tuple32 = new Tuple3((Object)BoxesRunTime.boxToCharacter((char)o), (Object)BoxesRunTime.boxToCharacter((char)v), (Object)BoxesRunTime.boxToCharacter((char)c));
            char o2 = BoxesRunTime.unboxToChar((Object)tuple33._1());
            char v2 = BoxesRunTime.unboxToChar((Object)tuple33._2());
            char c2 = BoxesRunTime.unboxToChar((Object)tuple33._3());
            StringBuilder newHead = new StringBuilder().append((Object)chunk2.subSequence(0, chunk2.length() - 2)).append(Hangul$.MODULE$.composeHangul(o2, v2, Hangul$.MODULE$.composeHangul$default$3()));
            if (c2 == '\u3134' && (last == '\ub370' || last == '\uac00' || last == '\uc9c0') && ((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Noun())).contains((CharSequence)newHead)) {
                String mid = v2 == '\u3161' ? "\uc740" : "\uc778";
                charSequence = new StringBuilder().append((Object)Predef.any2stringadd$.MODULE$.$plus$extension(Predef$.MODULE$.any2stringadd((Object)newHead), mid)).append((Object)BoxesRunTime.boxToCharacter((char)last)).toString();
            } else {
                charSequence = chunk2;
            }
            return charSequence;
        }
        throw new MatchError(tuple3);
    }

    public CharSequence com$twitter$penguin$korean$v1$normalizer$KoreanNormalizer$$processNormalizationCandidate(Regex.Match m) {
        String chunk2 = m.group(1);
        String toNormalize = m.group(2);
        String normalizedChunk = ((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Noun())).contains(chunk2) || ((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Eomi())).contains((CharSequence)new StringOps(Predef$.MODULE$.augmentString(chunk2)).takeRight(1)) || ((CharArraySet)KoreanDictionaryProvider$.MODULE$.koreanDictionary().apply((Object)KoreanPos$.MODULE$.Eomi())).contains((CharSequence)new StringOps(Predef$.MODULE$.augmentString(chunk2)).takeRight(2)) ? chunk2 : this.normalizeEmotionAttachedChunk(chunk2, toNormalize);
        return Predef.any2stringadd$.MODULE$.$plus$extension(Predef$.MODULE$.any2stringadd((Object)normalizedChunk), toNormalize);
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private CharSequence normalizeEmotionAttachedChunk(CharSequence s, CharSequence toNormalize) {
        Tuple3 tuple3;
        None$ none$;
        CharSequence charSequence;
        CharSequence init = s.subSequence(0, s.length() - 1);
        CharSequence charSequence2 = init;
        if (charSequence2 != null && (charSequence = charSequence2).length() > 0) {
            Tuple3 tuple32;
            Tuple3<Object, Object, Object> tuple33 = Hangul$.MODULE$.decomposeHangul(charSequence.charAt(charSequence.length() - 1));
            if (tuple33 == null) throw new MatchError(tuple33);
            char onset = BoxesRunTime.unboxToChar((Object)tuple33._1());
            char vowel = BoxesRunTime.unboxToChar((Object)tuple33._2());
            char coda = BoxesRunTime.unboxToChar((Object)tuple33._3());
            Tuple3 tuple34 = tuple32 = new Tuple3((Object)BoxesRunTime.boxToCharacter((char)onset), (Object)BoxesRunTime.boxToCharacter((char)vowel), (Object)BoxesRunTime.boxToCharacter((char)coda));
            char onset2 = BoxesRunTime.unboxToChar((Object)tuple34._1());
            char vowel2 = BoxesRunTime.unboxToChar((Object)tuple34._2());
            char coda2 = BoxesRunTime.unboxToChar((Object)tuple34._3());
            none$ = coda2 == ' ' ? new Some((Object)new Tuple3((Object)BoxesRunTime.boxToCharacter((char)onset2), (Object)BoxesRunTime.boxToCharacter((char)vowel2), (Object)BoxesRunTime.boxToCharacter((char)coda2))) : None$.MODULE$;
        } else {
            none$ = None$.MODULE$;
        }
        None$ secondToLastDecomposed = none$;
        Tuple3<Object, Object, Object> tuple35 = Hangul$.MODULE$.decomposeHangul(s.charAt(s.length() - 1));
        if (tuple35 != null) {
            char o = BoxesRunTime.unboxToChar((Object)tuple35._1());
            char v = BoxesRunTime.unboxToChar((Object)tuple35._2());
            char c = BoxesRunTime.unboxToChar((Object)tuple35._3());
            char c2 = o;
            char c3 = v;
            char c4 = c;
            if (c4 == '\u314b') return new StringBuilder().append((Object)init).append(Hangul$.MODULE$.composeHangul(c2, c3, Hangul$.MODULE$.composeHangul$default$3()));
            if (c4 == '\u314e') {
                return new StringBuilder().append((Object)init).append(Hangul$.MODULE$.composeHangul(c2, c3, Hangul$.MODULE$.composeHangul$default$3()));
            }
        }
        if (tuple35 == null) return s;
        char o = BoxesRunTime.unboxToChar((Object)tuple35._1());
        char v = BoxesRunTime.unboxToChar((Object)tuple35._2());
        char c = BoxesRunTime.unboxToChar((Object)tuple35._3());
        char c5 = o;
        char c6 = v;
        if (' ' != c) return s;
        if (!secondToLastDecomposed.isDefined()) return s;
        if (c6 != toNormalize.charAt(0)) return s;
        if (!Hangul$.MODULE$.CODA_MAP().contains((Object)BoxesRunTime.boxToCharacter((char)c5))) return s;
        Tuple3 tuple36 = (Tuple3)secondToLastDecomposed.get();
        if (tuple36 == null) throw new MatchError((Object)tuple36);
        char onset = BoxesRunTime.unboxToChar((Object)tuple36._1());
        char vowel = BoxesRunTime.unboxToChar((Object)tuple36._2());
        char coda = BoxesRunTime.unboxToChar((Object)tuple36._3());
        Tuple3 tuple37 = tuple3 = new Tuple3((Object)BoxesRunTime.boxToCharacter((char)onset), (Object)BoxesRunTime.boxToCharacter((char)vowel), (Object)BoxesRunTime.boxToCharacter((char)coda));
        char onset3 = BoxesRunTime.unboxToChar((Object)tuple37._1());
        char vowel3 = BoxesRunTime.unboxToChar((Object)tuple37._2());
        char coda3 = BoxesRunTime.unboxToChar((Object)tuple37._3());
        return new StringBuilder().append((Object)init.subSequence(0, init.length() - 1)).append(Hangul$.MODULE$.composeHangul(onset3, vowel3, c5));
    }

    private KoreanNormalizer$() {
        MODULE$ = this;
        this.EXTENTED_KOREAN_REGEX = new StringOps(Predef$.MODULE$.augmentString("([\u3131-\u3163\uac00-\ud7a3]+)")).r();
        this.KOREAN_TO_NORMALIZE_REGEX = new StringOps(Predef$.MODULE$.augmentString("([\uac00-\ud7a3]+)(\u314b+|\u314e+|[\u3160\u315c]+)")).r();
        this.REPEATING_CHAR_REGEX = new StringOps(Predef$.MODULE$.augmentString("(.)\\1{2,}|[\u3160\u315c]{2,}")).r();
        this.REPEATING_2CHAR_REGEX = new StringOps(Predef$.MODULE$.augmentString("(..)\\1{2,}")).r();
        this.WHITESPACE_REGEX = new StringOps(Predef$.MODULE$.augmentString("\\s+")).r();
        this.CODA_N_EXCPETION = new StringOps(Predef$.MODULE$.augmentString("\uc740\ub294\uc6b4\uc778\ud150\uadfc\ub978\ud508\ub2cc\ub4e0\ub358")).toSet();
    }
}

