package com.xiaomi.ai.minmt.common;

import com.xiaomi.ai.nlp.lm.util.Constant;
import com.xiaomi.aiasst.vision.cloud.CloudConstants;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/* loaded from: classes2.dex */
public class MosesDetokenizer {
    private static final String isSC = "$¢£¤¥֏؋৲৳৻૱௹฿៛₠₡₢₣₤₥₦₧₨₩₪₫€₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽꠸﷼﹩＄￠￡￥￦";
    private Language language;

    public MosesDetokenizer() {
        this.language = Language.EN;
    }

    public MosesDetokenizer(Language language) {
        this.language = language;
    }

    private static Map<String, Integer> createQuoteCounts() {
        HashMap hashMap = new HashMap();
        hashMap.put("'", 0);
        hashMap.put("\"", 0);
        hashMap.put("``", 0);
        hashMap.put("`", 0);
        hashMap.put("''", 0);
        hashMap.put("«", 0);
        return hashMap;
    }

    private static Map<String, String> createReplaceMap() {
        HashMap hashMap = new HashMap();
        hashMap.put("℃", "°C");
        hashMap.put("℉", "°F");
        return hashMap;
    }

    private static boolean isCJK(Character ch) {
        for (int[] iArr : new int[][]{new int[]{4352, 4607}, new int[]{11904, 42191}, new int[]{43072, 43135}, new int[]{63744, 64255}, new int[]{65072, 65103}, new int[]{65381, 65500}, new int[]{94208, 101119}, new int[]{110592, 110895}, new int[]{110960, 111359}, new int[]{131072, 196607}}) {
            if (Integer.valueOf(ch.charValue()).intValue() >= iArr[0] && Integer.valueOf(ch.charValue()).intValue() <= iArr[1]) {
                return true;
            }
        }
        return false;
    }

    public String tokenize(String str) {
        if (str == null || "".equals(str.trim())) {
            return str;
        }
        String replaceAll = String.format(" %s ", str).replace(" @-@ ", CloudConstants.WORD_SPLIT).replaceAll("([0-9.]+)\\s+°\\s+([CF])\\s+", "$1°$2 ");
        Map<String, Integer> createQuoteCounts = createQuoteCounts();
        Map<String, String> createReplaceMap = createReplaceMap();
        StringBuilder sb = new StringBuilder("");
        List asList = Arrays.asList(replaceAll.trim().split("\\s+"));
        String str2 = Constant.BLANK;
        for (int i = 0; i < asList.size(); i++) {
            String str3 = (String) asList.get(i);
            if (createReplaceMap.containsKey(str3)) {
                String str4 = createReplaceMap.get(str3);
                sb.append(str2);
                sb.append(str4);
            } else if (!isCJK(Character.valueOf(str3.charAt(0)))) {
                if (str3.matches("।")) {
                    sb.append(str3);
                } else if (str3.matches("[를을이가은는에로]|에게|으로")) {
                    sb.append(str3);
                } else if (Pattern.matches("^[$¢£¤¥֏؋৲৳৻૱௹฿៛₠₡₢₣₤₥₦₧₨₩₪₫€₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽꠸﷼﹩＄￠￡￥￦(\\[{¿¡]+$", str3)) {
                    sb.append(str2);
                    sb.append(str3);
                } else if (Pattern.matches("^[,.?!:;\\\\%}\\])]+$", str3)) {
                    sb.append(str3);
                } else if (this.language == Language.EN && i > 0 && Pattern.matches("^'[a-zA-Z].*", str3)) {
                    sb.append(str3);
                } else if (Pattern.matches("^['\"„“`«»]+$", str3)) {
                    String str5 = Pattern.matches("^[„“”]+$", str3) ? "\"" : str3;
                    if (str3.equals("»")) {
                        str5 = "«";
                    }
                    if (!createQuoteCounts.containsKey(str5)) {
                        createQuoteCounts.put(str5, 0);
                    }
                    if (createQuoteCounts.get(str5).intValue() % 2 != 0) {
                        sb.append(str3);
                        createQuoteCounts.put(str5, Integer.valueOf(createQuoteCounts.get(str5).intValue() + 1));
                    } else if (this.language == Language.EN && str3.equals("'") && i > 0 && Pattern.matches(".*s$", (CharSequence) asList.get(i - 1))) {
                        sb.append(str3);
                    } else {
                        sb.append(str2);
                        sb.append(str3);
                        createQuoteCounts.put(str5, Integer.valueOf(createQuoteCounts.get(str5).intValue() + 1));
                    }
                } else {
                    sb.append(str2);
                    sb.append(str3);
                }
                str2 = "";
            } else if (i <= 0 || !isCJK(Character.valueOf(str3.charAt(str3.length() - 1)))) {
                sb.append(str2);
                sb.append(str3);
            } else {
                sb.append(str3);
            }
            str2 = Constant.BLANK;
        }
        return sb.toString().replaceAll("\\s+", Constant.BLANK).trim();
    }

    public List<String> tokenize(List<String> list) {
        return Arrays.asList(tokenize(Utils.join(Constant.BLANK, list)).split(Constant.BLANK));
    }
}
