/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.corpus;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.dictionary.DictionaryMaker;
import com.hankcs.hanlp.corpus.dictionary.TFDictionary;
import com.hankcs.hanlp.corpus.dictionary.item.Item;
import com.hankcs.hanlp.corpus.document.CorpusLoader;
import com.hankcs.hanlp.corpus.document.Document;
import com.hankcs.hanlp.corpus.document.sentence.word.CompoundWord;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.occurrence.TermFrequency;
import java.util.List;
import java.util.Map;
import junit.framework.TestCase;

public class TestAdjustCoreDictionary
extends TestCase {
    public static final String DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT = HanLP.Config.CoreDictionaryPath;

    public void testGetCompiledWordFromDictionary() throws Exception {
        DictionaryMaker dictionaryMaker = DictionaryMaker.load("data/test/CoreNatureDictionary.txt");
        for (Map.Entry<String, Item> entry : dictionaryMaker.entrySet()) {
            String word = entry.getKey();
            Item item = entry.getValue();
            if (!word.matches(".##.")) continue;
            System.out.println(item);
        }
    }

    public void testViewNGramDictionary() throws Exception {
        TFDictionary tfDictionary = new TFDictionary();
        tfDictionary.load("data/dictionary/CoreNatureDictionary.ngram.txt");
        for (Map.Entry entry : tfDictionary.entrySet()) {
            String word = entry.getKey();
            TermFrequency frequency = (TermFrequency)entry.getValue();
            if (!word.contains("##")) continue;
            System.out.println(frequency);
        }
    }

    public void testSortCoreNatureDictionary() throws Exception {
        DictionaryMaker dictionaryMaker = DictionaryMaker.load(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT);
        dictionaryMaker.saveTxtTo(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT);
    }

    public void testSimplifyNZ() throws Exception {
        final DictionaryMaker nzDictionary = new DictionaryMaker();
        CorpusLoader.walk("D:\\Doc\\\u8bed\u6599\u5e93\\2014", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                for (List<IWord> sentence : document.getComplexSentenceList()) {
                    for (IWord word : sentence) {
                        if (!(word instanceof CompoundWord) || !"nz".equals(word.getLabel())) continue;
                        nzDictionary.add(word);
                    }
                }
            }
        });
        nzDictionary.saveTxtTo("data/test/nz.txt");
    }

    public void testRemoveNumber() throws Exception {
        DictionaryMaker dictionaryMaker = DictionaryMaker.load(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT);
        dictionaryMaker.saveTxtTo(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT, new DictionaryMaker.Filter(){

            @Override
            public boolean onSave(Item item) {
                if (item.key.length() == 1 && "0123456789\u96f6\u25cb\u3007\u4e00\u4e8c\u4e24\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u5eff\u767e\u5343\u4e07\u4ebf\u58f9\u8d30\u53c1\u8086\u4f0d\u9646\u67d2\u634c\u7396\u62fe\u4f70\u4edf".indexOf(item.key.charAt(0)) >= 0) {
                    System.out.println(item);
                    return false;
                }
                return true;
            }
        });
    }
}

