/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.corpus;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.dictionary.DictionaryMaker;
import com.hankcs.hanlp.corpus.dictionary.item.Item;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.BaseSearcher;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.Predefine;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import junit.framework.TestCase;

public class TestCustomDictionary
extends TestCase {
    public void testGet() throws Exception {
        System.out.println(CustomDictionary.get("\u4e00\u4e2a\u5fc3\u773c\u513f"));
    }

    public void testRemoveShortWord() throws Exception {
        String line;
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("data/dictionary/CustomDictionary.txt")));
        TreeSet<String> fixedDictionary = new TreeSet<String>();
        while ((line = br.readLine()) != null) {
            String[] param = line.split("\\s");
            if (param[0].length() == 1 || CoreDictionary.contains(param[0])) continue;
            fixedDictionary.add(line);
        }
        br.close();
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("data/dictionary/CustomDictionary.txt")));
        for (String word : fixedDictionary) {
            bw.write(word);
            bw.newLine();
        }
        bw.close();
    }

    public void testRemoveNR() throws Exception {
        String line;
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("data/dictionary/CustomDictionary.txt")));
        TreeSet<String> fixedDictionary = new TreeSet<String>();
        while ((line = br.readLine()) != null) {
            String[] param = line.split("\\s");
            if (param[1].equals("nr")) continue;
            fixedDictionary.add(line);
        }
        br.close();
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("data/dictionary/CustomDictionary.txt")));
        for (String word : fixedDictionary) {
            bw.write(word);
            bw.newLine();
        }
        bw.close();
    }

    public void testNext() throws Exception {
        Map.Entry entry;
        BaseSearcher searcher = CustomDictionary.getSearcher("\u90fd\u8981\u4eb2\u53e3");
        while ((entry = searcher.next()) != null) {
            int offset = searcher.getOffset();
            System.out.println(String.valueOf(offset + 1) + " " + entry);
        }
    }

    public void testRemoveJunkWord() throws Exception {
        DictionaryMaker dictionaryMaker = DictionaryMaker.load("data/dictionary/custom/CustomDictionary.txt");
        dictionaryMaker.saveTxtTo("data/dictionary/custom/CustomDictionary.txt", new DictionaryMaker.Filter(){

            @Override
            public boolean onSave(Item item) {
                return !item.containsLabel("mq") && !item.containsLabel("m") && !item.containsLabel("t");
            }
        });
    }

    public void testRemoveNotNS() throws Exception {
        String path = "data/dictionary/custom/\u5168\u56fd\u5730\u540d\u5927\u5168.txt";
        final TreeSet<Character> suffixSet = new TreeSet<Character>();
        char[] cArray = Predefine.POSTFIX_SINGLE.toCharArray();
        int n = cArray.length;
        int n2 = 0;
        while (n2 < n) {
            char c = cArray[n2];
            suffixSet.add(Character.valueOf(c));
            ++n2;
        }
        DictionaryMaker.load(path).saveTxtTo(path, new DictionaryMaker.Filter(){
            Segment segment = HanLP.newSegment().enableCustomDictionary(false);

            @Override
            public boolean onSave(Item item) {
                if (suffixSet.contains(Character.valueOf(item.key.charAt(item.key.length() - 1)))) {
                    return true;
                }
                List<Term> termList = this.segment.seg(item.key);
                if (termList.size() == 1 && termList.get((int)0).nature == Nature.nr) {
                    System.out.println(item);
                    return false;
                }
                return true;
            }
        });
    }
}

