/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.seg;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.dictionary.CoreBiGramTableDictionary;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.seg.CRF.CRFSegment;
import com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment;
import com.hankcs.hanlp.seg.Other.CommonAhoCorasickSegmentUtil;
import com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.Viterbi.ViterbiSegment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.seg.common.wrapper.SegmentWrapper;
import com.hankcs.hanlp.tokenizer.BasicTokenizer;
import com.hankcs.hanlp.tokenizer.IndexTokenizer;
import com.hankcs.hanlp.tokenizer.NLPTokenizer;
import com.hankcs.hanlp.tokenizer.NotionalTokenizer;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import com.hankcs.hanlp.tokenizer.TraditionalChineseTokenizer;
import java.io.BufferedReader;
import java.io.StringReader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.TreeMap;
import junit.framework.TestCase;

public class TestSegment
extends TestCase {
    public void testSeg() throws Exception {
        HanLP.Config.enableDebug();
        DijkstraSegment segment = new DijkstraSegment();
        System.out.println(segment.seg("\u6211\u4e2a\u4eba\u8ba4\u4e3a\u4e09\u4e2a\u4eba\u7684\u529b\u91cf\u5927\u4e8e1\u4e2a\u4eba\uff0c\u6240\u4ee5\u4e2a\u4eba\u5e94\u8be5\u670d\u4ece\u96c6\u4f53"));
    }

    public void testViterbi() throws Exception {
        HanLP.Config.enableDebug(true);
        CustomDictionary.add("\u7f51\u5267");
        DijkstraSegment seg = new DijkstraSegment();
        List<Term> termList = seg.seg("\u4f18\u9177\u603b\u88c1\u9b4f\u660e\u4ecb\u7ecd\u4e86\u4f18\u91772015\u5e74\u7684\u5185\u5bb9\u6218\u7565\uff0c\u8868\u793a\u8981\u4ee5\u201c\u5927\u7535\u5f71\u3001\u5927\u7f51\u5267\u3001\u5927\u7efc\u827a\u201d\u4e3a\u5173\u952e\u8bcd");
        System.out.println(termList);
    }

    public void testNotional() throws Exception {
        System.out.println(NotionalTokenizer.segment("\u7b97\u6cd5\u53ef\u4ee5\u5bbd\u6cdb\u7684\u5206\u4e3a\u4e09\u7c7b"));
    }

    public void testNGram() throws Exception {
        System.out.println(CoreBiGramTableDictionary.getBiFrequency("\u727a", "\u7272"));
    }

    public void testShortest() throws Exception {
        HanLP.Config.enableDebug();
        Segment segment = new ViterbiSegment().enableAllNamedEntityRecognize(true);
        System.out.println(segment.seg("\u628a\u5e02\u573a\u7ecf\u6d4e\u5949\u884c\u7684\u7b49\u4ef7\u4ea4\u6362\u539f\u5219\u5f15\u5165\u515a\u7684\u751f\u6d3b\u548c\u56fd\u5bb6\u673a\u5173\u653f\u52a1\u6d3b\u52a8\u4e2d"));
    }

    public void testIndexSeg() throws Exception {
        System.out.println(IndexTokenizer.segment("\u4e2d\u79d1\u9662\u9884\u6d4b\u79d1\u5b66\u7814\u7a76\u4e2d\u5fc3\u5b66\u672f\u59d4\u5458\u4f1a"));
    }

    public void testWrapper() throws Exception {
        Term fullTerm;
        SegmentWrapper wrapper = new SegmentWrapper(new BufferedReader(new StringReader("\u4e2d\u79d1\u9662\u9884\u6d4b\u79d1\u5b66\u7814\u7a76\u4e2d\u5fc3\u5b66\u672f\u59d4\u5458\u4f1a\nhaha")), StandardTokenizer.SEGMENT);
        while ((fullTerm = wrapper.next()) != null) {
            System.out.println(fullTerm);
        }
    }

    public void testSpeechTagging() throws Exception {
        HanLP.Config.enableDebug();
        String text = "\u6559\u6388\u6b63\u5728\u6559\u6388\u81ea\u7136\u8bed\u8a00\u5904\u7406\u8bfe\u7a0b";
        DijkstraSegment segment = new DijkstraSegment();
        System.out.println("\u672a\u6807\u6ce8\uff1a" + segment.seg(text));
        segment.enablePartOfSpeechTagging(true);
        System.out.println("\u6807\u6ce8\u540e\uff1a" + segment.seg(text));
    }

    public void testFactory() throws Exception {
        Segment segment = HanLP.newSegment();
    }

    public void testCustomDictionary() throws Exception {
        CustomDictionary.insert("\u80af\u5fb7\u57fa", "ns 1000");
        ViterbiSegment segment = new ViterbiSegment();
        System.out.println(segment.seg("\u80af\u5fb7\u57fa"));
    }

    public void testNT() throws Exception {
        HanLP.Config.enableDebug();
        Segment segment = new DijkstraSegment().enableOrganizationRecognize(true);
        System.out.println(segment.seg("\u5f20\u514b\u667a\u4e0e\u6f4d\u574a\u5730\u94c1\u5efa\u8bbe\u5de5\u7a0b\u516c\u53f8"));
    }

    public void testACSegment() throws Exception {
        DoubleArrayTrieSegment segment = new DoubleArrayTrieSegment();
        ((Segment)segment).enablePartOfSpeechTagging(true);
        System.out.println(segment.seg("\u6c5f\u897f\u9131\u9633\u6e56\u5e72\u67af\uff0c\u4e2d\u56fd\u6700\u5927\u6de1\u6c34\u6e56\u53d8\u6210\u5927\u8349\u539f"));
    }

    public void testIssue2() throws Exception {
        String text = "BENQphone";
        System.out.println(HanLP.segment(text));
        CustomDictionary.insert("BENQ");
        System.out.println(HanLP.segment(text));
    }

    public void testIssue3() throws Exception {
        TestSegment.assertEquals((byte)6, (byte)CharType.get('*'));
        System.out.println(HanLP.segment("300g*2"));
        System.out.println(HanLP.segment("\uff13\uff10\uff10\uff47\uff0a\uff12"));
        System.out.println(HanLP.segment("\u9c7c300\u514b*2/\u7ec4"));
    }

    public void testQuickAtomSegment() throws Exception {
        String text = "\u4f60\u597d1234abc Good\u4e00\u4e8c\u4e09\u56db3.14";
    }

    public void testJP() throws Exception {
        String text = "\u660e\u59298.9\u4f60\u597dabc\u5bf9\u4e86";
        Segment segment = new ViterbiSegment().enableCustomDictionary(false).enableAllNamedEntityRecognize(false);
        System.out.println(segment.seg(text));
    }

    public void testSpeedOfSecondViterbi() throws Exception {
        String text = "\u738b\u603b\u548c\u5c0f\u4e3d\u7ed3\u5a5a\u4e86";
        Segment segment = new ViterbiSegment().enableAllNamedEntityRecognize(false).enableNameRecognize(false).enableCustomDictionary(false);
        System.out.println(segment.seg(text));
        long start = System.currentTimeMillis();
        int pressure = 1000000;
        int i = 0;
        while (i < pressure) {
            segment.seg(text);
            ++i;
        }
        double costTime = (double)(System.currentTimeMillis() - start) / 1000.0;
        System.out.printf("\u5206\u8bcd\u901f\u5ea6\uff1a%.2f\u5b57\u6bcf\u79d2", (double)(text.length() * pressure) / costTime);
    }

    public void testNumberAndQuantifier() throws Exception {
        String[] testCase;
        StandardTokenizer.SEGMENT.enableNumberQuantifierRecognize(true);
        String[] stringArray = testCase = new String[]{"\u5341\u4e5d\u5143\u5957\u9910\u5305\u62ec\u4ec0\u4e48", "\u4e5d\u5343\u4e5d\u767e\u4e5d\u5341\u4e5d\u6735\u73ab\u7470", "\u58f9\u4f70\u5757\u94b1\u90fd\u4e0d\u7ed9\u6211", "\uff19\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\u53ea\u8682\u8681"};
        int n = testCase.length;
        int n2 = 0;
        while (n2 < n) {
            String sentence = stringArray[n2];
            System.out.println(StandardTokenizer.segment(sentence));
            ++n2;
        }
    }

    public void testIssue10() throws Exception {
        StandardTokenizer.SEGMENT.enableNumberQuantifierRecognize(true);
        IndexTokenizer.SEGMENT.enableNumberQuantifierRecognize(true);
        List<Term> termList = StandardTokenizer.segment("\u6b64\u5e10\u53f7\u6709\u6b20\u8d39\u4e1a\u52a1\u662f\u4ec0\u4e48");
        System.out.println(termList);
        termList = IndexTokenizer.segment("\u6b64\u5e10\u53f7\u6709\u6b20\u8d39\u4e1a\u52a1\u662f\u4ec0\u4e48");
        System.out.println(termList);
        termList = StandardTokenizer.segment("15307971214\u8bdd\u8d39\u8fd8\u6709\u591a\u5c11");
        System.out.println(termList);
        termList = IndexTokenizer.segment("15307971214\u8bdd\u8d39\u8fd8\u6709\u591a\u5c11");
        System.out.println(termList);
    }

    public void testMultiThreading() throws Exception {
        Segment segment = BasicTokenizer.SEGMENT;
        String text = "\u6c5f\u897f\u9131\u9633\u6e56\u5e72\u67af\uff0c\u4e2d\u56fd\u6700\u5927\u6de1\u6c34\u6e56\u53d8\u6210\u5927\u8349\u539f\u3002";
        System.out.println(segment.seg(text));
        int pressure = 100000;
        StringBuilder sbBigText = new StringBuilder(text.length() * pressure);
        int i = 0;
        while (i < pressure) {
            sbBigText.append(text);
            ++i;
        }
        text = sbBigText.toString();
        long start = System.currentTimeMillis();
        List<Term> termList1 = segment.seg(text);
        double costTime = (double)(System.currentTimeMillis() - start) / 1000.0;
        System.out.printf("\u5355\u7ebf\u7a0b\u5206\u8bcd\u901f\u5ea6\uff1a%.2f\u5b57\u6bcf\u79d2\n", (double)text.length() / costTime);
        segment.enableMultithreading(4);
        start = System.currentTimeMillis();
        List<Term> termList2 = segment.seg(text);
        costTime = (double)(System.currentTimeMillis() - start) / 1000.0;
        System.out.printf("\u56db\u7ebf\u7a0b\u5206\u8bcd\u901f\u5ea6\uff1a%.2f\u5b57\u6bcf\u79d2\n", (double)text.length() / costTime);
        TestSegment.assertEquals((int)termList1.size(), (int)termList2.size());
        Iterator<Term> iterator1 = termList1.iterator();
        Iterator<Term> iterator2 = termList2.iterator();
        while (iterator1.hasNext()) {
            Term term1 = iterator1.next();
            Term term2 = iterator2.next();
            TestSegment.assertEquals((String)term1.word, (String)term2.word);
            TestSegment.assertEquals((Object)((Object)term1.nature), (Object)((Object)term2.nature));
            TestSegment.assertEquals((int)term1.offset, (int)term2.offset);
        }
    }

    public void testTryToCrashSegment() throws Exception {
        String text = "\u5c1d\u8bd5\u73a9\u574f\u5206\u8bcd\u5668";
        Segment segment = new ViterbiSegment().enableMultithreading(100);
        System.out.println(segment.seg(text));
    }

    public void testCRFSegment() throws Exception {
        HanLP.Config.enableDebug();
        CRFSegment segment = new CRFSegment();
        System.out.println(segment.seg("\u6709\u53e5\u8c1a\u8bed\u53eb\u505a\u4e00\u4e2a\u841d\u535c\u4e00\u4e2a\u5751\u513f"));
    }

    public void testIssue16() throws Exception {
        CustomDictionary.insert("\u7231\u542c4g", "nz 1000");
        ViterbiSegment segment = new ViterbiSegment();
        System.out.println(segment.seg("\u7231\u542c4g"));
        System.out.println(segment.seg("\u7231\u542c4G"));
        System.out.println(segment.seg("\u7231\u542c\uff14G"));
        System.out.println(segment.seg("\u7231\u542c\uff14\uff27"));
        System.out.println(segment.seg("\u611b\u807d\uff14\uff27"));
    }

    public void testIssuse17() throws Exception {
        System.out.println(CharType.get('\u0000'));
        System.out.println(CharType.get(' '));
        TestSegment.assertEquals((char)CharTable.convert(' '), (char)' ');
        System.out.println(CharTable.convert('\ufe57'));
        HanLP.Config.Normalization = true;
        System.out.println(StandardTokenizer.segment("\u53f7 "));
    }

    public void testIssue22() throws Exception {
        CoreDictionary.Attribute attribute = CoreDictionary.get("\u5e74");
        System.out.println(attribute);
        List<Term> termList = StandardTokenizer.segment("\u4e09\u5e74");
        System.out.println(termList);
        TestSegment.assertEquals((Object)((Object)attribute.nature[0]), (Object)((Object)termList.get((int)1).nature));
        System.out.println(StandardTokenizer.segment("\u4e09\u5143"));
        StandardTokenizer.SEGMENT.enableNumberQuantifierRecognize(true);
        System.out.println(StandardTokenizer.segment("\u4e09\u5e74"));
    }

    public void testIssue71() throws Exception {
        Segment segment = HanLP.newSegment();
        segment = segment.enableAllNamedEntityRecognize(true);
        segment = segment.enableNumberQuantifierRecognize(true);
        System.out.println(segment.seg("\u66fe\u5e7b\u60f3\u8fc7\uff0c\u82e5\u5e72\u5e74\u540e\u7684\u6211\u5c31\u662f\u8fd9\u4e2a\u6837\u5b50\u7684\u5417"));
    }

    public void testTime() throws Exception {
        TraditionalChineseTokenizer.segment("\u8ba4\u53ef\u7a0b\u5ea6");
    }

    public void testBuildASimpleSegment() throws Exception {
        TreeMap<String, String> dictionary = new TreeMap<String, String>();
        dictionary.put("HanLP", "\u540d\u8bcd");
        dictionary.put("\u7279\u522b", "\u526f\u8bcd");
        dictionary.put("\u65b9\u4fbf", "\u5f62\u5bb9\u8bcd");
        AhoCorasickDoubleArrayTrie<String> acdat = new AhoCorasickDoubleArrayTrie<String>();
        acdat.build(dictionary);
        LinkedList termList = CommonAhoCorasickSegmentUtil.segment("HanLP\u662f\u4e0d\u662f\u7279\u522b\u65b9\u4fbf\uff1f", acdat);
        System.out.println(termList);
    }

    public void testNLPSegment() throws Exception {
        String text = "2013\u5e744\u670827\u65e511\u65f654\u5206";
        NLPTokenizer.SEGMENT.enableNumberQuantifierRecognize(true);
        System.out.println(NLPTokenizer.segment(text));
    }

    public void testTraditionalSegment() throws Exception {
        CustomDictionary.insert("\u4e49\u6d88\u4eba\u5458");
        String text = "\u57fa\u9686\u5e02\u9577\u6797\u53f3\u660c\u5c0d\u7fa9\u6d88\u4eba\u54e1\u9577\u671f\u5354\u52a9\u6d88\u9632\u6551\u707d\u5de5\u4f5c";
        System.out.println(TraditionalChineseTokenizer.segment(text));
    }
}

