package com.xiaomi.ai.minmt.common;

import com.xiaomi.ai.nlp.lm.util.Constant;

/* loaded from: classes2.dex */
public class ZhCharTokenizer {
    public String tokenize(String str) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (Utils.isChineseChar(charAt)) {
                sb.append(Constant.BLANK);
                sb.append(charAt);
                sb.append(Constant.BLANK);
            } else {
                sb.append(charAt);
            }
        }
        return sb.toString().replaceAll("([{-~\\[-` -&(-+;-@/])", " $1 ").replaceAll("([^0-9])([.,])", "$1 $2").replaceAll("([.,])([^0-9])", " $1 $2").replaceAll("([0-9])(-)", "$1 $2 ").replaceAll("\\s+", Constant.BLANK).replaceAll("^\\s+|\\s+$", "");
    }
}
