/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.corpus;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.dictionary.DictionaryMaker;
import com.hankcs.hanlp.corpus.dictionary.SimpleDictionary;
import com.hankcs.hanlp.corpus.dictionary.StringDictionary;
import com.hankcs.hanlp.corpus.dictionary.item.Item;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.BiGramDictionary;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.py.Pinyin;
import com.hankcs.hanlp.dictionary.py.PinyinDictionary;
import com.hankcs.hanlp.dictionary.py.TonePinyinString2PinyinConverter;
import com.hankcs.hanlp.dictionary.ts.TraditionalChineseDictionary;
import com.hankcs.hanlp.seg.NShort.NShortSegment;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.utility.TextUtility;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.TestCase;

public class TestXianDaiHanYu
extends TestCase {
    public void testMakeDictionary() throws Exception {
        String text = IOUtil.readTxt("D:\\Doc\\\u8bed\u6599\u5e93\\\u73b0\u4ee3\u6c49\u8bed\u8bcd\u5178\uff08\u7b2c\u4e94\u7248\uff09\u5168\u6587_\u66f4\u65b0.txt").toLowerCase();
        Pattern pattern = Pattern.compile("\u3010([\\u4E00-\\u9FA5]+)\u3011([abcdefghijklmnopqrstuwxyz\u0101\u00e1\u01ce\u00e0\u0113\u00e9\u011b\u00e8\u012b\u00ed\u01d0\u00ec\u014d\u00f3\u01d2\u00f2\u016b\u00fa\u01d4\u00f9\u01d6\u01d8\u01da\u01dc\u2225\u2022\u2019\uff0d]+)");
        Matcher matcher = pattern.matcher(text);
        StringDictionary dictionary = new StringDictionary();
        while (matcher.find()) {
            String word = matcher.group(1);
            String pinyinString = matcher.group(2);
            List<Pinyin> pinyinList = TonePinyinString2PinyinConverter.convert(pinyinString, false);
            if (pinyinList.size() != word.length() || this.hasNull(pinyinList)) {
                System.out.println("\u8f6c\u6362\u5931\u8d25 " + word + " " + pinyinString + " " + pinyinList);
                continue;
            }
            dictionary.add(word, this.convertList2String(pinyinList));
        }
        System.out.println(dictionary.size());
        dictionary.save("data/dictionary/pinyin/pinyin.xd.txt");
    }

    public void testMakePyDictionary() throws Exception {
        StringDictionary dictionaryRaw = new StringDictionary();
        dictionaryRaw.load("D:\\PythonProjects\\python-pinyin\\dic.txt");
        StringDictionary dictionary = new StringDictionary();
        for (Map.Entry entry : dictionaryRaw.entrySet()) {
            String word = entry.getKey();
            String[] pinyinArray = ((String)entry.getValue()).split(",");
            List<Pinyin> pinyinList = TonePinyinString2PinyinConverter.convert(pinyinArray);
            if (word.length() != pinyinList.size() || this.hasNull(pinyinList)) {
                System.out.println(entry + " | " + pinyinList);
                continue;
            }
            dictionary.add(entry.getKey(), this.convertList2String(pinyinList));
        }
        dictionary.save("data/dictionary/pinyin/pinyin.python.txt");
    }

    public void testCombinePy() throws Exception {
        StringDictionary dictionary = new StringDictionary();
        dictionary.load("data/dictionary/pinyin/pinyin.python.txt");
        dictionary.remove(new SimpleDictionary.Filter<String>(){

            @Override
            public boolean remove(Map.Entry<String, String> entry) {
                List<Pinyin> localPinyinList;
                String key = entry.getKey();
                String[] pinyinArray = entry.getValue().split(",");
                List<Pinyin> pinyinList = TonePinyinString2PinyinConverter.convertFromToneNumber(pinyinArray);
                if (!TestXianDaiHanYu.this.isEqual(pinyinList, localPinyinList = PinyinDictionary.convertToPinyin(key))) {
                    System.out.println("\u63a5\u53d7 " + key + "=" + pinyinList + "!=" + localPinyinList);
                    return false;
                }
                return true;
            }
        });
        StringDictionary dictionaryLocal = new StringDictionary();
        dictionaryLocal.load(HanLP.Config.PinyinDictionaryPath);
        dictionaryLocal.combine(dictionary);
        dictionaryLocal.save(HanLP.Config.PinyinDictionaryPath);
    }

    public void testMakeKaiFangDictionary() throws Exception {
        LinkedList<String> lineList = IOUtil.readLineList("D:\\Doc\\\u8bed\u6599\u5e93\\cidian_zhzh-kfcd-2013122.txt");
        StringDictionary dictionaryKFTC = new StringDictionary();
        for (String line : lineList) {
            List<Pinyin> localPinyinList;
            String[] args = line.split("\\s");
            ArrayList<Pinyin> pinyinList = new ArrayList<Pinyin>(args.length - 2);
            int i = 2;
            while (i < args.length) {
                pinyinList.add(TonePinyinString2PinyinConverter.convertFromToneNumber(args[i]));
                ++i;
            }
            if (this.hasNull(pinyinList) || pinyinList.size() != args[1].length() || this.isEqual(pinyinList, localPinyinList = PinyinDictionary.convertToPinyin(args[1]))) continue;
            System.out.println("\u63a5\u53d7 " + args[1] + "=" + pinyinList + "!=" + localPinyinList);
            dictionaryKFTC.add(args[1], this.convertList2String(pinyinList));
        }
        StringDictionary dictionaryLocal = new StringDictionary();
        dictionaryLocal.load(HanLP.Config.PinyinDictionaryPath);
        dictionaryLocal.combine(dictionaryKFTC);
        dictionaryLocal.save(HanLP.Config.PinyinDictionaryPath);
    }

    public void testPinyin() throws Exception {
        System.out.println(PinyinDictionary.convertToPinyin("\u9f9f\u80cc"));
    }

    private boolean isEqual(List<Pinyin> pinyinListA, List<Pinyin> pinyinListB) {
        if (pinyinListA.size() != pinyinListB.size()) {
            return false;
        }
        Iterator<Pinyin> iteratorA = pinyinListA.iterator();
        Iterator<Pinyin> iteratorB = pinyinListB.iterator();
        while (iteratorA.hasNext()) {
            if (iteratorA.next() == iteratorB.next()) continue;
            return false;
        }
        return true;
    }

    public void testT2C() throws Exception {
        System.out.println(TraditionalChineseDictionary.convertToSimplifiedChinese("\u71b1\u7dda"));
    }

    public void testConvertSingle() throws Exception {
        System.out.println((Object)TonePinyinString2PinyinConverter.convert("ai"));
    }

    private String convertList2String(List<Pinyin> pinyinList) {
        StringBuilder sb = new StringBuilder();
        for (Pinyin pinyin : pinyinList) {
            sb.append((Object)pinyin);
            sb.append(',');
        }
        sb.deleteCharAt(sb.length() - 1);
        return sb.toString();
    }

    private boolean hasNull(List<Pinyin> pinyinList) {
        for (Pinyin pinyin : pinyinList) {
            if (pinyin != null) continue;
            return true;
        }
        return false;
    }

    public void testEnumChar() throws Exception {
        TreeSet<Character> characterSet = new TreeSet<Character>();
        Pinyin[] pinyinArray = PinyinDictionary.pinyins;
        int n = PinyinDictionary.pinyins.length;
        int n2 = 0;
        while (n2 < n) {
            Pinyin pinyin = pinyinArray[n2];
            char[] cArray = pinyin.getPinyinWithToneMark().toCharArray();
            int n3 = cArray.length;
            int n4 = 0;
            while (n4 < n3) {
                char c = cArray[n4];
                characterSet.add(Character.valueOf(c));
                ++n4;
            }
            ++n2;
        }
        for (Character c : characterSet) {
            System.out.print(c);
        }
    }

    public void testToken() throws Exception {
        System.out.println(TonePinyinString2PinyinConverter.convert("\u0101g\u014dng", true));
    }

    public void testMakeNatureDictionary() throws Exception {
        String text = IOUtil.readTxt("D:\\Doc\\\u8bed\u6599\u5e93\\\u73b0\u4ee3\u6c49\u8bed\u8bcd\u5178\uff08\u7b2c\u4e94\u7248\uff09\u5168\u6587_\u66f4\u65b0.txt").toLowerCase();
        Pattern pattern = Pattern.compile("\u3010([\\u4E00-\\u9FA5]{2,10})\u3011.{0,5}([abcdefghijklmnopqrstuwxyz\u0101\u00e1\u01ce\u00e0\u0113\u00e9\u011b\u00e8\u012b\u00ed\u01d0\u00ec\u014d\u00f3\u01d2\u00f2\u016b\u00fa\u01d4\u00f9\u01d6\u01d8\u01da\u01dc\u2225\u2022\u2019\uff0d]+)(.*)");
        Matcher matcher = pattern.matcher(text);
        DictionaryMaker dictionaryMaker = new DictionaryMaker();
        dictionaryMaker.add("\u5e0c\u671b v 7685 vn 616");
        TreeMap<String, String> mapChineseToNature = new TreeMap<String, String>();
        mapChineseToNature.put("\u540d", Nature.n.toString());
        mapChineseToNature.put("\u52a8", Nature.v.toString());
        mapChineseToNature.put("\u5f62", Nature.a.toString());
        mapChineseToNature.put("\u526f", Nature.d.toString());
        mapChineseToNature.put("\u5f62\u5bb9", Nature.a.toString());
        while (matcher.find()) {
            String word = matcher.group(1);
            if (CoreDictionary.contains(word) || CustomDictionary.contains(word)) continue;
            String content = matcher.group(3);
            Item item = new Item(word);
            for (Map.Entry entry : mapChineseToNature.entrySet()) {
                int frequency = TextUtility.count((String)entry.getKey(), content);
                if (frequency <= 0) continue;
                item.addLabel((String)entry.getValue(), frequency);
            }
            if (item.getTotalFrequency() == 0) {
                item.addLabel(Nature.nz.toString());
            }
            dictionaryMaker.add(item);
        }
        dictionaryMaker.saveTxtTo("data/dictionary/custom/\u73b0\u4ee3\u6c49\u8bed\u8865\u5145\u8bcd\u5e93.txt");
    }

    public void testMakeCell() throws Exception {
        String root = "D:\\JavaProjects\\SougouDownload\\data\\";
        String[] pathArray = new String[]{"\u6700\u8be6\u7ec6\u7684\u5168\u56fd\u5730\u540d\u5927\u5168.txt"};
        TreeSet<String> wordSet = new TreeSet<String>();
        String[] stringArray = pathArray;
        int n = pathArray.length;
        int n2 = 0;
        while (n2 < n) {
            String path = stringArray[n2];
            path = String.valueOf(root) + path;
            for (String word : IOUtil.readLineList(path)) {
                if (!TextUtility.isAllChinese(word = word.replaceAll("\\s", "")) || CoreDictionary.contains(word) || CustomDictionary.contains(word)) continue;
                wordSet.add(word);
            }
            ++n2;
        }
        IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/custom/\u5168\u56fd\u5730\u540d\u5927\u5168.txt");
    }

    public void testMakeShanghaiCell() throws Exception {
        String root = "D:\\JavaProjects\\SougouDownload\\data\\";
        String[] pathArray = new String[]{"\u4e0a\u6d77\u5730\u540d\u8857\u9053\u540d.txt", "\u4e0a\u6d77\u516c\u4ea4\u7ebf\u8def\u540d", "\u4e0a\u6d77\u516c\u4ea4\u7ad9\u70b9.txt", "\u4e0a\u6d77\u5e02\u9053\u8def\u540d.txt", "\u4e0a\u6d77\u5e02\u5730\u94c1\u7ad9\u540d.txt"};
        TreeSet<String> wordSet = new TreeSet<String>();
        String[] stringArray = pathArray;
        int n = pathArray.length;
        int n2 = 0;
        while (n2 < n) {
            String path = stringArray[n2];
            path = String.valueOf(root) + path;
            for (String word : IOUtil.readLineList(path)) {
                if (CoreDictionary.contains(word = word.replaceAll("\\s", "")) || CustomDictionary.contains(word)) continue;
                wordSet.add(word);
            }
            ++n2;
        }
        IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/custom/\u4e0a\u6d77\u5730\u540d.txt");
    }

    public void testFixDiMing() throws Exception {
        TreeSet<String> wordSet = new TreeSet<String>();
        for (String word : IOUtil.readLineList("data/dictionary/custom/\u5168\u56fd\u5730\u540d\u5927\u5168.txt")) {
            if (!TextUtility.isAllChinese(word)) continue;
            wordSet.add(word);
        }
        IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/custom/\u5168\u56fd\u5730\u540d\u5927\u5168.txt");
    }

    public void testSeg() throws Exception {
        Segment segment = new NShortSegment().enableNameRecognize(true);
        HanLP.Config.enableDebug(true);
        System.out.println(segment.seg("\u6211\u5728\u533a\u4eba\u4fdd\u5de5\u4f5c"));
    }

    public void testDebug() throws Exception {
        System.out.println(BiGramDictionary.getBiFrequency("\u4fdd@\u5de5\u4f5c"));
    }
}

