/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.model;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.CorpusLoader;
import com.hankcs.hanlp.corpus.document.Document;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.corpus.io.ByteArray;
import com.hankcs.hanlp.model.trigram.CharacterBasedGenerativeModel;
import com.hankcs.hanlp.seg.HMM.HMMSegment;
import com.hankcs.hanlp.seg.common.Term;
import java.util.LinkedList;
import java.util.List;
import junit.framework.TestCase;

public class TestCharacterBasedGenerativeModel
extends TestCase {
    public void testTrainAndSegment() throws Exception {
        final CharacterBasedGenerativeModel model = new CharacterBasedGenerativeModel();
        CorpusLoader.walk("D:\\JavaProjects\\HanLP\\data\\test\\cbgm", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                for (List<Word> sentence : document.getSimpleSentenceList()) {
                    model.learn(sentence);
                }
            }
        });
        model.train();
        String text = "\u4e2d\u56fd\u9886\u571f";
        char[] charArray = text.toCharArray();
        char[] tag = model.tag(charArray);
        System.out.println(tag);
    }

    public void testLoad() throws Exception {
        CharacterBasedGenerativeModel model = new CharacterBasedGenerativeModel();
        model.load(ByteArray.createByteArray(HanLP.Config.HMMSegmentModelPath));
        String text = "\u6211\u5b9e\u73b0\u4e86\u4e00\u4e2a\u57fa\u4e8eCharacter Based TriGram\u7684\u5206\u8bcd\u5668";
        char[] sentence = text.toCharArray();
        char[] tag = model.tag(sentence);
        LinkedList<String> termList = new LinkedList<String>();
        int offset = 0;
        int i = 0;
        while (i < tag.length) {
            switch (tag[i]) {
                case 'b': {
                    int begin = offset;
                    while (tag[i] != 'e') {
                        ++offset;
                        if (++i == tag.length) break;
                    }
                    if (i == tag.length) {
                        termList.add(new String(sentence, begin, offset - begin));
                        break;
                    }
                    termList.add(new String(sentence, begin, offset - begin + 1));
                    break;
                }
                default: {
                    termList.add(new String(sentence, offset, 1));
                }
            }
            ++offset;
            ++i;
        }
        System.out.println(tag);
        System.out.println(termList);
    }

    public void testSegment() throws Exception {
        HanLP.Config.ShowTermNature = false;
        String text = "\u6211\u5b9e\u73b0\u4e86\u4e00\u4e2a\u57fa\u4e8eCharacter Based TriGram\u7684\u5206\u8bcd\u5668";
        HMMSegment segment = new HMMSegment();
        List<Term> termList = segment.seg(text);
        System.out.println(termList);
    }
}

