/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.corpus;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.dictionary.StringDictionary;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.dictionary.CoreBiGramTableDictionary;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.nr.JapanesePersonDictionary;
import com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment;
import com.hankcs.hanlp.utility.TextUtility;
import java.io.BufferedWriter;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.TreeSet;
import junit.framework.TestCase;

public class TestMakeJapaneseName
extends TestCase {
    public void testCombine() throws Exception {
        String root = "D:\\JavaProjects\\SougouDownload\\data\\";
        String[] pathArray = new String[]{"\u65e5\u672c\u540d\u4eba\u5927\u5408\u96c6.txt", "\u65e5\u5267\u7535\u5f71\u52a8\u6f2b\u548c\u65e5\u672c\u660e\u661f.txt", "\u65e5\u672c\u5973\u4f18.txt", "\u65e5\u672cAV\u5973\u4f18(A\u7247)EXTEND\u7248.txt", "\u65e5\u672c\u5973\u4f18\u5927\u5168.txt"};
        TreeSet<String> wordSet = new TreeSet<String>();
        String[] stringArray = pathArray;
        int n = pathArray.length;
        int n2 = 0;
        while (n2 < n) {
            String path = stringArray[n2];
            path = String.valueOf(root) + path;
            for (String word : IOUtil.readLineList(path)) {
                if (CoreDictionary.contains(word = word.replaceAll("[a-z\r\n]", "")) || CustomDictionary.contains(word)) continue;
                wordSet.add(word);
            }
            ++n2;
        }
        TreeSet<String> firstNameSet = new TreeSet<String>();
        firstNameSet.addAll(IOUtil.readLineList("data/dictionary/person/\u65e5\u672c\u59d3\u6c0f.txt"));
        Iterator iterator = wordSet.iterator();
        while (iterator.hasNext()) {
            String name = (String)iterator.next();
            if (name.length() <= 6 && name.length() >= 3 && (firstNameSet.contains(name.substring(0, 1)) || firstNameSet.contains(name.substring(0, 2)) || firstNameSet.contains(name.substring(0, 3)))) continue;
            iterator.remove();
        }
        IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/person/\u65e5\u672c\u4eba\u540d.txt");
    }

    public void testMakeRoleDictionary() throws Exception {
        TreeSet<String> firstNameSet = new TreeSet<String>();
        firstNameSet.addAll(IOUtil.readLineList("data/dictionary/person/\u65e5\u672c\u59d3\u6c0f.txt"));
        TreeSet<String> fullNameSet = new TreeSet<String>();
        fullNameSet.addAll(IOUtil.readLineList("data/dictionary/person/\u65e5\u672c\u4eba\u540d.txt"));
        StringDictionary dictionary = new StringDictionary(" ");
        block0: for (String fullName : fullNameSet) {
            int i = Math.min(3, fullName.length() - 1);
            while (i > 0) {
                String firstName = fullName.substring(0, i);
                if (firstNameSet.contains(firstName)) {
                    dictionary.add(fullName.substring(i), "m");
                    continue block0;
                }
                --i;
            }
        }
        for (String firstName : firstNameSet) {
            dictionary.add(firstName, "x");
        }
        dictionary.save("data/dictionary/person/nrj.txt");
    }

    public void testRecognize() throws Exception {
        HanLP.Config.enableDebug();
        DijkstraSegment segment = new DijkstraSegment();
        System.out.println(segment.seg("\u6211\u53eb\u5927\u6749\u4e9a\u4f9d\u91cc"));
    }

    private String getLongestSuffix(String a, String b) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < a.length() && i < b.length()) {
            if (a.charAt(i) != b.charAt(i)) {
                return sb.toString();
            }
            sb.append(a.charAt(i));
            ++i;
        }
        return sb.toString();
    }

    public void testImport() throws Exception {
        TreeSet<String> set = new TreeSet<String>();
        for (String name : IOUtil.readLineList("D:\\Doc\\\u8bed\u6599\u5e93\\corpus-master\\\u65e5\u672c\u59d3\u6c0f.txt")) {
            name = HanLP.convertToSimplifiedChinese(Arrays.toString(name.toCharArray()));
            if (!TextUtility.isAllChinese(name = name.replaceAll("[\\[\\], ]", ""))) continue;
            set.add(name);
        }
        IOUtil.saveCollectionToTxt(set, "data/dictionary/person/\u65e5\u672c\u59d3\u6c0f.txt");
    }

    public void testLoadJapanese() throws Exception {
        System.out.println(JapanesePersonDictionary.get("\u592a\u90ce"));
    }

    public void testSeg() throws Exception {
        HanLP.Config.enableDebug();
        DijkstraSegment segment = new DijkstraSegment();
        segment.enableJapaneseNameRecognize(true);
        System.out.println(segment.seg("\u6797\u5fd7\u73b2\u4eae\u76f8\u7f51\u53cb:\u786e\u5b9a\u4e0d\u662f\u6ce2\u591a\u91ce\u7ed3\u8863\uff1f"));
    }

    public void testCountBadCase() throws Exception {
        BufferedWriter bw = IOUtil.newBufferedWriter(String.valueOf(HanLP.Config.JapanesePersonDictionaryPath) + ".badcase.txt");
        LinkedList<String> xList = new LinkedList<String>();
        LinkedList<String> mList = new LinkedList<String>();
        IOUtil.LineIterator iterator = new IOUtil.LineIterator(HanLP.Config.JapanesePersonDictionaryPath);
        while (iterator.hasNext()) {
            String line = iterator.next();
            String[] args = line.split("\\s");
            if ("x".equals(args[1])) {
                xList.add(args[0]);
                continue;
            }
            mList.add(args[0]);
        }
        for (String x : xList) {
            for (String m : mList) {
                if (CoreBiGramTableDictionary.getBiFrequency(x, m) <= 0) continue;
                bw.write(String.valueOf(x) + m + " A");
                bw.newLine();
            }
        }
        bw.close();
    }
}

