/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.test.corpus;

import com.hankcs.hanlp.corpus.dictionary.DictionaryMaker;
import com.hankcs.hanlp.corpus.document.CorpusLoader;
import com.hankcs.hanlp.corpus.document.Document;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;
import junit.framework.TestCase;

public class TestCorpusLoader
extends TestCase {
    public void testMultiThread() throws Exception {
        CorpusLoader.HandlerThread[] handlerThreadArray = new CorpusLoader.HandlerThread[4];
        int i = 0;
        while (i < handlerThreadArray.length) {
            handlerThreadArray[i] = new CorpusLoader.HandlerThread(String.valueOf(i)){

                @Override
                public void handle(Document document) {
                }
            };
            ++i;
        }
        CorpusLoader.walk("data/2014", handlerThreadArray);
    }

    public void testSingleThread() throws Exception {
        CorpusLoader.walk("data/2014", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
            }
        });
    }

    public void testCombineToTxt() throws Exception {
        final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream("D:\\Doc\\\u8bed\u6599\u5e93\\2014_cn.txt"), "UTF-8"));
        CorpusLoader.walk("D:\\Doc\\\u8bed\u6599\u5e93\\2014_hankcs", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                try {
                    for (List<Word> sentence : document.getSimpleSentenceList()) {
                        for (IWord iWord : sentence) {
                            bw.write(iWord.getValue());
                            bw.write(32);
                        }
                        bw.newLine();
                    }
                    bw.newLine();
                }
                catch (Exception e) {
                    e.printStackTrace();
                }
            }
        });
        bw.close();
    }

    public void testConvert2SimpleSentenceList() throws Exception {
        List<List<Word>> simpleSentenceList = CorpusLoader.convert2SimpleSentenceList("data/2014");
        System.out.println(simpleSentenceList.get(0));
    }

    public void testMakePersonCustomDictionary() throws Exception {
        final DictionaryMaker dictionaryMaker = new DictionaryMaker();
        CorpusLoader.walk("D:\\JavaProjects\\CorpusToolBox\\data\\2014", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                List<List<IWord>> complexSentenceList = document.getComplexSentenceList();
                for (List<IWord> wordList : complexSentenceList) {
                    for (IWord word : wordList) {
                        if (!word.getLabel().startsWith("nr")) continue;
                        dictionaryMaker.add(word);
                    }
                }
            }
        });
        dictionaryMaker.saveTxtTo("data/dictionary/custom/\u4eba\u540d\u8bcd\u5178.txt");
    }

    public void testMakeOrganizationCustomDictionary() throws Exception {
        final DictionaryMaker dictionaryMaker = new DictionaryMaker();
        CorpusLoader.walk("D:\\JavaProjects\\CorpusToolBox\\data\\2014", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                List<List<IWord>> complexSentenceList = document.getComplexSentenceList();
                for (List<IWord> wordList : complexSentenceList) {
                    for (IWord word : wordList) {
                        if (!word.getLabel().startsWith("nt")) continue;
                        dictionaryMaker.add(word);
                    }
                }
            }
        });
        dictionaryMaker.saveTxtTo("data/dictionary/custom/\u673a\u6784\u540d\u8bcd\u5178.txt");
    }

    public void testAdjustDot() throws Exception {
        CorpusLoader.walk("D:\\JavaProjects\\CorpusToolBox\\data\\2014", new CorpusLoader.Handler(){
            int id = 0;

            @Override
            public void handle(Document document) {
                try {
                    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream("D:\\Doc\\\u8bed\u6599\u5e93\\2014_hankcs\\" + ++this.id + ".txt"), "UTF-8"));
                    for (List<IWord> wordList : document.getComplexSentenceList()) {
                        if (wordList.size() == 0) continue;
                        for (IWord word : wordList) {
                            if (word.getValue().length() > 1 && word.getValue().charAt(0) == '\u3002') {
                                bw.write("\u3002/w");
                                bw.write(word.getValue().substring(1));
                                bw.write(47);
                                bw.write(word.getLabel());
                                bw.write(32);
                                continue;
                            }
                            bw.write(word.toString());
                            bw.write(32);
                        }
                        bw.newLine();
                    }
                    bw.close();
                }
                catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
                catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }

    public void testLoadMyCorpus() throws Exception {
        CorpusLoader.walk("D:\\Doc\\\u8bed\u6599\u5e93\\2014_hankcs\\", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                for (List<IWord> wordList : document.getComplexSentenceList()) {
                    System.out.println(wordList);
                }
            }
        });
    }

    public void testFindQuote() throws Exception {
        CorpusLoader.walk("D:\\Doc\\\u8bed\u6599\u5e93\\2014_hankcs\\", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                for (List<Word> wordList : document.getSimpleSentenceList()) {
                    for (Word word : wordList) {
                        if (word.value.length() <= 1 || !word.value.endsWith("\"")) continue;
                        System.out.println(word);
                    }
                }
            }
        });
    }
}

