package com.zzsn.search.googleThread;

import com.google.gson.Gson;
import com.zzsn.utility.index.Constants;
import com.zzsn.utility.model.CatchWebByMetaSearch;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
 * 百度搜索
 * 1.根据关键词请求进入页面，
 * 2.将抓取到的内容信息保存到本地数据库表
 */
public class DetailGoogleSearch {

    public static void main(String[] args) throws IOException {
        String filepath= Constants.META_SEARCH_KEYWORDPATH;
        //1.创建消费者
        KafkaConsumer<String, String> consumer = createConsumer();
        consumer.subscribe(Arrays.asList(Constants.KAFKA_PRODUCT_GOOGLE_URLLIST_TOPIC));

        try{
            while(true){
                //消费者是一个长期运行的程序，通过持续轮询向Kafka请求数据。在其他线程中调用consumer.wakeup()可以退出循环
                //在0ms内等待Kafka的broker返回数据.超时参数指定poll在多久之后可以返回，不管有没有可用的数据都要返回
                ConsumerRecords<String, String> records = consumer.poll(0);
                consumer.commitSync();
                for(ConsumerRecord record : records){
                    CatchWebByMetaSearch catchWebByMetaSearch = new Gson().fromJson(record.value().toString(), CatchWebByMetaSearch.class);
                    DetailGoogleSearchThread detailBaiduSearchThread=new DetailGoogleSearchThread();
                    detailBaiduSearchThread.catchWebByMetaSearch=catchWebByMetaSearch;
                    detailBaiduSearchThread.crawler();
                    //创建使用固定线程数的线程池
//                    TimeUnit.SECONDS.sleep(120);
                }
            }
        }catch (Exception e){
            consumer = createConsumer();
            consumer.subscribe(Arrays.asList(Constants.KAFKA_CONSUMER_TOPIC));
        }

    }
    public static String dateToStamp(String s)  {
        String res="";
        //设置时间模版
        try {
            SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
            Date date = simpleDateFormat.parse(s);
            long ts = date.getTime()/1000;
            res = String.valueOf(ts);
        }catch (Exception e){

        }

        return res;
    }
    public static void paser(List<String> keywords,String startTime,String endTime){
        List<List<String>> splitList = splitList(keywords,5000);
        ExecutorService threadPool = Executors.newFixedThreadPool(1);
        Vector<WebGoogleSearchThread> workers = new Vector<WebGoogleSearchThread>();
        int index = 0;
        try {
            for (List<String> keywordList : splitList) {
//                BaiduSearchThread worker = new BaiduSearchThread();
                WebGoogleSearchThread baiduSearchThread = new WebGoogleSearchThread();
                baiduSearchThread.setThreadId(index++);
                baiduSearchThread.setKeywords(keywordList);
                baiduSearchThread.setStartTime(startTime);
                baiduSearchThread.setEndTime(endTime);

                workers.add(baiduSearchThread);
                threadPool.execute(baiduSearchThread);
                Thread.sleep(1000);
            }
        }catch (Exception e){
            System.out.println(e.getMessage());
        }
        threadPool.shutdown();
        while (true) {
            boolean isfinished = threadPool.isTerminated();
            if (isfinished)
                break;
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                System.out.println(e.getMessage());
            }
        }

    }

    //  对list进行分割
    public static <T> List<List<T>>  splitList(List<T> list, int len) {
        if (list == null || list.size() == 0 || len < 1) {
            return null;
        }
        List<List<T>> result = new ArrayList<List<T>>();
        int size = list.size();
        int count = (size + len - 1) / len;
        for (int i = 0; i < count; i++) {
            List<T> subList = list.subList(i * len, ((i + 1) * len > size ? size : len * (i + 1)));
            result.add(subList);
        }
        return result;
    }

    private static KafkaConsumer<String, String> createConsumer() {
        Properties properties = new Properties();
        System.out.println(Constants.KAFKA_CONSUMER_SERVERS);
        properties.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, Constants.KAFKA_CONSUMER_SERVERS);
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.GROUP_ID_CONFIG, Constants.KAFKA_CONSUMER_GROUP_ID);
        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
        //kafka数据的读取方式
        properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,Constants.KAFKA_CONSUMER_AUTO_OFFSET_RESET);
//         latest earliest
        //时间间隔设置为1h
        properties.put("max.poll.interval.ms", 60*60*1000);
        properties.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 1);
        return new KafkaConsumer<>(properties);
    }
}
