/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.corpus.document;

import com.hankcs.hanlp.corpus.document.Document;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.corpus.io.FolderWalker;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.utility.Predefine;
import java.io.File;
import java.util.LinkedList;
import java.util.List;

public class CorpusLoader {
    public static void walk(String folderPath, Handler handler) {
        long start = System.currentTimeMillis();
        List<File> fileList = FolderWalker.open(folderPath);
        int i = 0;
        for (File file : fileList) {
            System.out.print(file);
            Document document = CorpusLoader.convert2Document(file);
            System.out.println(" " + ++i + " / " + fileList.size());
            handler.handle(document);
        }
        System.out.printf("\u82b1\u8d39\u65f6\u95f4%d ms\n", System.currentTimeMillis() - start);
    }

    public static void walk(String folderPath, HandlerThread[] threadArray) {
        long start = System.currentTimeMillis();
        List<File> fileList = FolderWalker.open(folderPath);
        int i = 0;
        while (i < threadArray.length - 1) {
            threadArray[i].fileList = fileList.subList(fileList.size() / threadArray.length * i, fileList.size() / threadArray.length * (i + 1));
            threadArray[i].start();
            ++i;
        }
        threadArray[threadArray.length - 1].fileList = fileList.subList(fileList.size() / threadArray.length * (threadArray.length - 1), fileList.size());
        threadArray[threadArray.length - 1].start();
        HandlerThread[] handlerThreadArray = threadArray;
        int n = threadArray.length;
        int n2 = 0;
        while (n2 < n) {
            HandlerThread handlerThread = handlerThreadArray[n2];
            try {
                handlerThread.join();
            }
            catch (InterruptedException e) {
                Predefine.logger.warning("\u591a\u7ebf\u7a0b\u5f02\u5e38" + e);
            }
            ++n2;
        }
        System.out.printf("\u82b1\u8d39\u65f6\u95f4%d ms\n", System.currentTimeMillis() - start);
    }

    public static List<Document> convert2DocumentList(String folderPath) {
        long start = System.currentTimeMillis();
        List<File> fileList = FolderWalker.open(folderPath);
        LinkedList<Document> documentList = new LinkedList<Document>();
        int i = 0;
        for (File file : fileList) {
            System.out.print(file);
            Document document = CorpusLoader.convert2Document(file);
            documentList.add(document);
            System.out.println(" " + ++i + " / " + fileList.size());
        }
        System.out.println(documentList.size());
        System.out.printf("\u82b1\u8d39\u65f6\u95f4%d ms\n", System.currentTimeMillis() - start);
        return documentList;
    }

    public static List<Document> loadCorpus(String path) {
        return (List)IOUtil.readObjectFrom(path);
    }

    public static boolean saveCorpus(List<Document> documentList, String path) {
        return IOUtil.saveObjectTo(documentList, path);
    }

    public static List<List<IWord>> loadSentenceList(String path) {
        return (List)IOUtil.readObjectFrom(path);
    }

    public static boolean saveSentenceList(List<List<IWord>> sentenceList, String path) {
        return IOUtil.saveObjectTo(sentenceList, path);
    }

    public static List<List<IWord>> convert2SentenceList(String path) {
        List<Document> documentList = CorpusLoader.convert2DocumentList(path);
        LinkedList<List<IWord>> simpleList = new LinkedList<List<IWord>>();
        for (Document document : documentList) {
            for (Sentence sentence : document.sentenceList) {
                simpleList.add(sentence.wordList);
            }
        }
        return simpleList;
    }

    public static List<List<Word>> convert2SimpleSentenceList(String path) {
        List<Document> documentList = CorpusLoader.convert2DocumentList(path);
        LinkedList<List<Word>> simpleList = new LinkedList<List<Word>>();
        for (Document document : documentList) {
            simpleList.addAll(document.getSimpleSentenceList());
        }
        return simpleList;
    }

    public static Document convert2Document(File file) {
        Document document = Document.create(IOUtil.readTxt(file.getPath()));
        if (document != null) {
            return document;
        }
        System.exit(-1);
        return null;
    }

    public static interface Handler {
        public void handle(Document var1);
    }

    public static abstract class HandlerThread
    extends Thread
    implements Handler {
        public List<File> fileList;

        public HandlerThread(String name) {
            super(name);
        }

        @Override
        public void run() {
            long start = System.currentTimeMillis();
            System.out.printf("\u7ebf\u7a0b#%s \u5f00\u59cb\u8fd0\u884c\n", this.getName());
            int i = 0;
            for (File file : this.fileList) {
                System.out.print(file);
                Document document = CorpusLoader.convert2Document(file);
                System.out.println(" " + ++i + " / " + this.fileList.size());
                this.handle(document);
            }
            System.out.printf("\u7ebf\u7a0b#%s \u8fd0\u884c\u5b8c\u6bd5\uff0c\u8017\u65f6%dms\n", this.getName(), System.currentTimeMillis() - start);
        }
    }
}

