/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.model;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
import com.hankcs.hanlp.corpus.document.CorpusLoader;
import com.hankcs.hanlp.corpus.document.Document;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.corpus.io.ByteArray;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.model.crf.CRFModel;
import com.hankcs.hanlp.model.crf.FeatureFunction;
import com.hankcs.hanlp.model.crf.FeatureTemplate;
import com.hankcs.hanlp.model.crf.Table;
import com.hankcs.hanlp.seg.CRF.CRFSegment;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import junit.framework.TestCase;

public class TestCRF
extends TestCase {
    public void testTemplate() throws Exception {
        FeatureTemplate featureTemplate = FeatureTemplate.create("U05:%x[-2,0]/%x[-1,0]/%x[0,0]");
        Table table = new Table();
        table.v = new String[][]{{"\u90a3", "S"}, {"\u97f3", "B"}, {"\u97f5", "E"}};
        char[] parameter = featureTemplate.generateParameter(table, 0);
        System.out.println(parameter);
    }

    public void testTestLoadTemplate() throws Exception {
        DataOutputStream out = new DataOutputStream(new FileOutputStream("data/test/out.bin"));
        FeatureTemplate featureTemplate = FeatureTemplate.create("U05:%x[-2,0]/%x[-1,0]/%x[0,0]");
        featureTemplate.save(out);
        featureTemplate = new FeatureTemplate();
        featureTemplate.load(ByteArray.createByteArray("data/test/out.bin"));
        System.out.println(featureTemplate);
    }

    public void testLoadFromTxt() throws Exception {
        CRFModel model = CRFModel.loadTxt("D:\\Tools\\CRF++-0.58\\example\\seg_cn\\model.txt");
        Table table = new Table();
        table.v = new String[][]{{"\u5546", "?"}, {"\u54c1", "?"}, {"\u548c", "?"}, {"\u670d", "?"}, {"\u52a1", "?"}};
        model.tag(table);
        System.out.println(table);
    }

    public void testLoadModelWhichHasNoB() throws Exception {
        CRFModel model = CRFModel.loadTxt("D:\\Tools\\CRF++-0.58\\example\\dependency\\model.txt");
        System.out.println(model);
    }

    public void testSegment() throws Exception {
        HanLP.Config.enableDebug();
        CRFSegment segment = new CRFSegment();
        System.out.println(segment.seg("\u4e50\u89c6\u8d85\u7ea7\u624b\u673a\u80fd\u5426\u627f\u8f7d\u8d3e\u5e03\u65af\u7684\u751f\u6001\u68a6"));
    }

    public void testPrepareCRFTrainingCorpus() throws Exception {
        final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream("e:\\2014.txt"), "UTF-8"));
        CorpusLoader.walk("D:\\Doc\\\u8bed\u6599\u5e93\\2014_hankcs", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                try {
                    List<List<Word>> sentenceList = document.getSimpleSentenceList();
                    if (sentenceList.size() == 0) {
                        return;
                    }
                    for (List<Word> sentence : sentenceList) {
                        if (sentence.size() == 0) continue;
                        for (IWord iWord : sentence) {
                            String word = iWord.getValue();
                            String tag = iWord.getLabel();
                            String compiledString = TestCRF.compile(tag);
                            if (compiledString != null) {
                                word = compiledString;
                            }
                            if (word.length() == 1 || compiledString != null) {
                                bw.write(word);
                                bw.write(9);
                                bw.write(83);
                                bw.write(10);
                                continue;
                            }
                            bw.write(word.charAt(0));
                            bw.write(9);
                            bw.write(66);
                            bw.write(10);
                            int i = 1;
                            while (i < word.length() - 1) {
                                bw.write(word.charAt(i));
                                bw.write(9);
                                bw.write(77);
                                bw.write(10);
                                ++i;
                            }
                            bw.write(word.charAt(word.length() - 1));
                            bw.write(9);
                            bw.write(69);
                            bw.write(10);
                        }
                        bw.write(10);
                    }
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
        bw.close();
    }

    public void testEnglishAndNumber() throws Exception {
        String text = "2.34\u7c73";
        HanLP.Config.enableDebug();
        CRFSegment segment = new CRFSegment();
        System.out.println(segment.seg(text));
    }

    public static String compile(String tag) {
        if (tag.startsWith("m")) {
            return "M";
        }
        if (tag.equals("x")) {
            return "W";
        }
        if (tag.equals("nx")) {
            return "W";
        }
        return null;
    }

    public void testLoadModelWithBiGramFeature() throws Exception {
        String path = String.valueOf(HanLP.Config.CRFSegmentModelPath) + ".bin";
        CRFModel model = new CRFModel(new BinTrie<FeatureFunction>());
        model.load(ByteArray.createByteArray(path));
        Table table = new Table();
        String text = "\u4eba\u6c11\u751f\u6d3b\u8fdb\u4e00\u6b65\u6539\u5584\u4e86";
        table.v = new String[text.length()][2];
        int i = 0;
        while (i < text.length()) {
            table.v[i][0] = String.valueOf(text.charAt(i));
            ++i;
        }
        model.tag(table);
        System.out.println(table);
    }

    public void testRemoveSpace() throws Exception {
        String inputPath = "E:\\2014.txt";
        String outputPath = "E:\\2014f.txt";
        BufferedReader br = IOUtil.newBufferedReader(inputPath);
        BufferedWriter bw = IOUtil.newBufferedWriter(outputPath);
        String line = "";
        int preLength = 0;
        while ((line = br.readLine()) != null) {
            if (preLength == 0 && line.length() == 0) continue;
            bw.write(line);
            bw.newLine();
            preLength = line.length();
        }
        bw.close();
    }
}

