/*
 * Decompiled with CFR 0.152.
 */
package org.snu.ids.ha.ma;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import org.snu.ids.ha.ma.CharSetType;
import org.snu.ids.ha.ma.Token;
import org.snu.ids.ha.ma.TokenPattern;
import org.snu.ids.ha.util.Util;

public class Tokenizer {
    public static final TokenPattern[] PREDEFINED_TOKEN_PATTERN = new TokenPattern[]{new TokenPattern("[a-zA-Z0-9]+[-][a-zA-Z0-9]+", CharSetType.COMBINED), new TokenPattern("(\u314b|\u3160|\u315c|\u314e){2,}", CharSetType.EMOTICON), new TokenPattern("(\\^){3,}", CharSetType.EMOTICON), new TokenPattern("[-]?[0-9]+([,][0-9]{3})*([.][0-9]+)?", CharSetType.NUMBER), new TokenPattern("[(][\\^]([.]|_|[-]|o|0|O|3|~|[ ])?[\\^][']?[)]", CharSetType.EMOTICON), new TokenPattern("[d][\\^]([.]|_|[-]|o|0|O|3|~|[ ])?[\\^][b]", CharSetType.EMOTICON), new TokenPattern("[\\^]([.]|_|[-]|o|0|O|3|~|[ ])?[\\^]([;]+|['\"avV\u3157])?", CharSetType.EMOTICON), new TokenPattern("[(];_;[)]", CharSetType.EMOTICON), new TokenPattern("[(]T[_.~oO\\^]?T[)]", CharSetType.EMOTICON), new TokenPattern("\u315c[_.]?\u315c", CharSetType.EMOTICON), new TokenPattern("\u3161[_.]?\u315c", CharSetType.EMOTICON), new TokenPattern("\u315c[_.]?\u3161", CharSetType.EMOTICON), new TokenPattern("\u3160[_.]?\u3160", CharSetType.EMOTICON), new TokenPattern("\u3161[_.]?\u3160", CharSetType.EMOTICON), new TokenPattern("\u3160[_.]?\u3161", CharSetType.EMOTICON), new TokenPattern("\u3160[_.]?\u315c", CharSetType.EMOTICON), new TokenPattern("\u315c[_.]?\u3160", CharSetType.EMOTICON), new TokenPattern("[(][-](_|[.])?[-]([;]+|[a\u3157])?[)](zzZ)?", CharSetType.EMOTICON), new TokenPattern("[-](_|[.])?[-]([;]+|[a\u3157]|(zzZ))?", CharSetType.EMOTICON), new TokenPattern("[\u3161](_|[.])?[\u3161]([;]+|[a\u3157]|(zzZ))?", CharSetType.EMOTICON), new TokenPattern("[(][>]([.]|_)?[<][)]", CharSetType.EMOTICON), new TokenPattern("[>]([.]|_)?[<]", CharSetType.EMOTICON), new TokenPattern("[(][>]([.]|_)?[>][)]", CharSetType.EMOTICON), new TokenPattern("[>]([.]|_)?[>]", CharSetType.EMOTICON), new TokenPattern("[(][\u00ac]([.]|_)?[\u00ac][)]", CharSetType.EMOTICON), new TokenPattern("[\u00ac]([.]|_)?[\u00ac]", CharSetType.EMOTICON), new TokenPattern("[(]'(_|[.])\\^[)]", CharSetType.EMOTICON), new TokenPattern("'(_|[.])\\^", CharSetType.EMOTICON), new TokenPattern("\\^(_|[.])[~]", CharSetType.EMOTICON), new TokenPattern("[~](_|[.])\\^", CharSetType.EMOTICON), new TokenPattern("[(][.][_][.][)]", CharSetType.EMOTICON), new TokenPattern("[(]['][_]['][)]", CharSetType.EMOTICON), new TokenPattern("[(][,][_][,][)]", CharSetType.EMOTICON), new TokenPattern("[(][X][_][X][)]", CharSetType.EMOTICON), new TokenPattern("[O][_.][o]", CharSetType.EMOTICON), new TokenPattern("[o][_.][O]", CharSetType.EMOTICON), new TokenPattern("m[(]_ _[)]m", CharSetType.EMOTICON)};

    public static List<Token> tokenize(String string) {
        if (!Util.valid(string)) {
            return null;
        }
        ArrayList<Token> tkList = new ArrayList<Token>();
        StringBuffer sb = new StringBuffer(string);
        int i = 0;
        int ptnlen = PREDEFINED_TOKEN_PATTERN.length;
        while (i < ptnlen) {
            TokenPattern tkptn = PREDEFINED_TOKEN_PATTERN[i];
            tkList.addAll(Tokenizer.find(sb, tkptn));
            ++i;
        }
        int strlen = string.length();
        boolean[] chkPrednfdPtn = Tokenizer.checkFound(strlen, tkList);
        char preCh = '\u0000';
        char ch = '\u0000';
        String temp = "";
        CharSetType presentToken = CharSetType.ETC;
        CharSetType lastToken = CharSetType.ETC;
        int tokenIndex = 0;
        int i2 = 0;
        while (i2 < strlen) {
            ch = sb.charAt(i2);
            lastToken = presentToken;
            Character.UnicodeBlock ub = Character.UnicodeBlock.of(ch);
            presentToken = chkPrednfdPtn[i2] ? CharSetType.EMOTICON : (ub == Character.UnicodeBlock.HANGUL_SYLLABLES || ub == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO ? CharSetType.HANGUL : (ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ? CharSetType.HANMUN : (ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z' ? CharSetType.ENGLISH : (ch >= '0' && ch <= '9' ? CharSetType.NUMBER : (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' ? CharSetType.SPACE : (ub == Character.UnicodeBlock.LETTERLIKE_SYMBOLS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS || ub == Character.UnicodeBlock.BASIC_LATIN ? CharSetType.SYMBOL : CharSetType.ETC))))));
            if (i2 != 0 && (lastToken != presentToken || presentToken == CharSetType.ETC && (temp.length() <= 0 || temp.charAt(temp.length() - 1) != ch) || presentToken == CharSetType.SYMBOL && preCh != ch)) {
                if (lastToken != CharSetType.EMOTICON) {
                    tkList.add(new Token(temp, lastToken, tokenIndex));
                }
                tokenIndex = i2;
                temp = "";
            }
            temp = String.valueOf(temp) + ch;
            preCh = ch;
            ++i2;
        }
        if (Util.valid(temp)) {
            tkList.add(new Token(temp, presentToken, tokenIndex));
        }
        Collections.sort(tkList);
        return tkList;
    }

    private static List<Token> find(StringBuffer sb, TokenPattern tkptn) {
        if (tkptn == null) {
            return null;
        }
        ArrayList<Token> tkList = new ArrayList<Token>();
        Matcher matcher = tkptn.pattern.matcher(sb);
        while (matcher.find()) {
            tkList.add(new Token(sb.substring(matcher.start(), matcher.end()), tkptn.charSetType, matcher.start()));
            int i = matcher.start();
            while (i < matcher.end()) {
                sb.setCharAt(i, ' ');
                ++i;
            }
        }
        return tkList;
    }

    private static boolean[] checkFound(int strlen, List<Token> tkList) {
        boolean[] bFound = new boolean[strlen];
        int i = 0;
        while (i < strlen) {
            bFound[i] = false;
            ++i;
        }
        i = 0;
        int size = tkList == null ? 0 : tkList.size();
        while (i < size) {
            Token tk = tkList.get(i);
            int j = 0;
            int jsize = tk.string.length();
            while (j < jsize) {
                bFound[tk.index + j] = true;
                ++j;
            }
            ++i;
        }
        return bFound;
    }
}

