package com.samsung.nlepd.bert;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/* loaded from: classes2.dex */
public final class WordpieceTokenizer {
    private static final int MAX_INPUTCHARS_PER_WORD = 200;
    private static final String UNKNOWN_TOKEN = "[UNK]";
    private final Map<String, Integer> dic;

    public WordpieceTokenizer(Map<String, Integer> map) {
        this.dic = map;
    }

    public List<String> tokenize(String str) {
        String str2;
        Objects.requireNonNull(str, "The input String is null.");
        ArrayList arrayList = new ArrayList();
        for (String str3 : BasicTokenizer.whitespaceTokenize(str)) {
            if (str3.length() > 200) {
                arrayList.add(UNKNOWN_TOKEN);
            } else {
                ArrayList arrayList2 = new ArrayList();
                boolean z = false;
                int i2 = 0;
                while (true) {
                    if (i2 >= str3.length()) {
                        break;
                    }
                    int length = str3.length();
                    while (true) {
                        if (i2 >= length) {
                            str2 = "";
                            break;
                        }
                        str2 = i2 == 0 ? str3.substring(i2, length) : "##" + str3.substring(i2, length);
                        if (this.dic.containsKey(str2)) {
                            break;
                        }
                        length--;
                    }
                    if ("".equals(str2)) {
                        z = true;
                        break;
                    }
                    arrayList2.add(str2);
                    i2 = length;
                }
                if (z) {
                    arrayList.add(UNKNOWN_TOKEN);
                } else {
                    arrayList.addAll(arrayList2);
                }
            }
        }
        return arrayList;
    }
}
