package com.zzsn.crawler;


import com.zzsn.util.Constants;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
 * 百度搜索
 * 1.根据关键词请求进入页面，
 * 2.将抓取到的内容信息保存到本地数据库表
 */
public class WeixinSearch {

    public static void main(String[] args) throws IOException {
//        String filepath=args[0];

        String filepath= Constants.META_SEARCH_KEYWORDPATH;
        File f = new File(filepath);
        List<String> allLines = FileUtil.getFileLines(f, "utf-8");
        paser(allLines);
    }
    public static void paser(List<String> keywords){
        List<List<String>> splitList = splitList(keywords,5000);
        ExecutorService threadPool = Executors.newFixedThreadPool(1);
        Vector<WeixinSearchThread> workers = new Vector<WeixinSearchThread>();
        int index = 0;
        try {
            for (List<String> keywordList : splitList) {
                WeixinSearchThread souGouSearchThread = new WeixinSearchThread();
                souGouSearchThread.setThreadId(index++);
                souGouSearchThread.setKeywords(keywordList);
                workers.add(souGouSearchThread);
                threadPool.execute(souGouSearchThread);
                Thread.sleep(1000);
            }
        }catch (Exception e){
            System.out.println(e.getMessage());
        }
        threadPool.shutdown();
        while (true) {
            boolean isfinished = threadPool.isTerminated();
            if (isfinished)
                break;
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                System.out.println(e.getMessage());
            }
        }

    }

    //  对list进行分割
    public static <T> List<List<T>>  splitList(List<T> list, int len) {
        if (list == null || list.size() == 0 || len < 1) {
            return null;
        }
        List<List<T>> result = new ArrayList<List<T>>();
        int size = list.size();
        int count = (size + len - 1) / len;
        for (int i = 0; i < count; i++) {
            List<T> subList = list.subList(i * len, ((i + 1) * len > size ? size : len * (i + 1)));
            result.add(subList);
        }
        return result;
    }

}
