/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.tokenizer;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.dictionary.ts.SimplifiedChineseDictionary;
import com.hankcs.hanlp.dictionary.ts.TraditionalChineseDictionary;
import com.hankcs.hanlp.seg.Other.CommonAhoCorasickSegmentUtil;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.ResultTerm;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.SentencesUtil;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

public class TraditionalChineseTokenizer {
    public static Segment SEGMENT = HanLP.newSegment();

    private static List<Term> segSentence(String text) {
        if (text.length() == 0) {
            return Collections.emptyList();
        }
        LinkedList<ResultTerm<String>> tsList = CommonAhoCorasickSegmentUtil.segment(text, TraditionalChineseDictionary.trie);
        StringBuilder sbSimplifiedChinese = new StringBuilder(text.length());
        boolean equal = true;
        for (ResultTerm resultTerm : tsList) {
            if (resultTerm.label == null) {
                resultTerm.label = resultTerm.word;
            } else if (((String)resultTerm.label).length() != resultTerm.word.length()) {
                equal = false;
            }
            sbSimplifiedChinese.append((String)resultTerm.label);
        }
        String string = sbSimplifiedChinese.toString();
        List<Term> termList = SEGMENT.seg(string);
        if (equal) {
            int offset = 0;
            for (Term term : termList) {
                term.word = text.substring(offset, offset + term.length());
                term.offset = offset;
                offset += term.length();
            }
        } else {
            Iterator<Term> termIterator = termList.iterator();
            Iterator tsIterator = tsList.iterator();
            ResultTerm tsTerm = (ResultTerm)tsIterator.next();
            int offset = 0;
            while (termIterator.hasNext()) {
                Term term = termIterator.next();
                term.offset = offset;
                if (offset > tsTerm.offset + tsTerm.word.length()) {
                    tsTerm = (ResultTerm)tsIterator.next();
                }
                term.word = offset == tsTerm.offset && term.length() == ((String)tsTerm.label).length() ? tsTerm.word : SimplifiedChineseDictionary.convertToTraditionalChinese(term.word);
                offset += term.length();
            }
        }
        return termList;
    }

    public static List<Term> segment(String text) {
        LinkedList<Term> termList = new LinkedList<Term>();
        for (String sentence : SentencesUtil.toSentenceList(text)) {
            termList.addAll(TraditionalChineseTokenizer.segSentence(sentence));
        }
        return termList;
    }

    public static List<Term> segment(char[] text) {
        return TraditionalChineseTokenizer.segment(CharTable.convert(text));
    }

    public static List<List<Term>> seg2sentence(String text) {
        LinkedList<List<Term>> resultList = new LinkedList<List<Term>>();
        for (String sentence : SentencesUtil.toSentenceList(text)) {
            resultList.add(TraditionalChineseTokenizer.segment(sentence));
        }
        return resultList;
    }
}

