package com.zzsn.search.google;


import com.fasterxml.jackson.databind.ObjectMapper;
import com.zzsn.search.util.PublishDateUtil;
import com.zzsn.utility.index.Constants;
import com.zzsn.utility.model.CatchWebByMetaSearch;
import com.zzsn.utility.util.ChromeUtil;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.kafka.core.KafkaTemplate;

import java.util.ArrayList;
import java.util.List;

@Slf4j
public class GoogleRecorderUtil {

	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfGoogle(List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				Thread.sleep(1000*5);
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Document doc = null;
				String docstr= ChromeUtil.getChromeDoc(urlList.get(i),((i)*20+180)+"");
				if(docstr==null){
					continue;
				}
				doc=Jsoup.parse(docstr);
				Elements firstElementsLink = doc.select("div[class=g]");
				//若果没有结果则不循环
				if(firstElementsLink.size()==0){
					break;
				}
				String info = doc.toString();
				for (int j = 0; j < firstElementsLink.size(); j++) {
					catchWebByMetaSearch= new CatchWebByMetaSearch();
					Elements e=firstElementsLink.get(j).select("h3");
					Elements a=firstElementsLink.get(j).select("a");
					Elements timespan=firstElementsLink.get(j).select("span[class=f]");
					System.out.println(e.get(0).text());
					System.out.println(a.get(0).attr("href"));
					catchWebByMetaSearch.setTid(tid);
					catchWebByMetaSearch.setSummary(urlList.get(i));
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
					catchWebByMetaSearch.setTitle(e.get(0).text());
					if(timespan.size()>0){
						System.out.println(timespan.get(0).text());
						if(timespan.get(0).text().contains("hours")) {
							catchWebByMetaSearch.setPublishDate(DatePaserUtil.getCreateDate());
						}else if(timespan.get(0).text().contains("day")) {
							int day=DatePaserUtil.getIntstr(timespan.get(0).text());

								catchWebByMetaSearch.setPublishDate(DatePaserUtil.getDateBeforesomdat(day));
							}
						else if(timespan.get(0).text().contains("ago")) {
							catchWebByMetaSearch.setPublishDate(DatePaserUtil.getCreateDate());
						}
						else{
							String date= PublishDateUtil.getPublishDate(timespan.get(0).text());
							System.out.println(date);
							catchWebByMetaSearch.setPublishDate(date);
						}
						System.out.println(catchWebByMetaSearch.getPublishDate());

					}
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}

	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> catchWebOfGoogleList(
			List<String> urlList, String charset, Long orgId, Long tid,KafkaTemplate kafkaTemplate) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				Thread.sleep(2000);
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Document doc = null;
				System.out.println(urlList.get(i));
//				String docstr=ChromeUtil.getChromeDocnews(urlList.get(i),((i)*20)+"");
				String docstr=ChromeUtil.getChromeDoc(urlList.get(i));
				if(docstr==null){
					continue;
				}
				doc=Jsoup.parse(docstr);
				Elements firstElementsLink = doc.select("g-card[class=ftSUBd]");
				//若果没有结果则不循环
				if(firstElementsLink.size()==0){
					break;
				}
				for (int j = 0; j < firstElementsLink.size(); j++) {
					catchWebByMetaSearch= new CatchWebByMetaSearch();
					//标题
//					Elements e=firstElementsLink.get(j).select("div[class=mCBkyc tNxQIb y355M JIFdL JQe2Ld nDgy9d]");
					Elements e=firstElementsLink.get(j).select("div[class=\"mCBkyc y355M JQe2Ld nDgy9d\"]");
					//链接
					Elements a=firstElementsLink.get(j).select("a");
					//Elements timespan=firstElementsLink.get(j).select("span[class=WG9SHc]");
					System.out.println(e.get(0).text());
					System.out.println(a.get(0).attr("href"));
					catchWebByMetaSearch.setTid(tid);
					catchWebByMetaSearch.setSummary(urlList.get(i));
					//catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
					catchWebByMetaSearch.setTitle(e.get(0).text());
					//来源
					String origin=firstElementsLink.get(j).select("div[class=\"CEMjEf NUnG9d\"]").text();
					catchWebByMetaSearch.setSourcesite(origin);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			for (CatchWebByMetaSearch catchWebByMetaSearch:catchWebByMetaSearchList){
                ObjectMapper mapper = new ObjectMapper();
                try {
                    String docjson = mapper.writeValueAsString(catchWebByMetaSearch);
                    kafkaTemplate.send(Constants.KAFKA_PRODUCT_GOOGLE_URLLIST_TOPIC, "key", docjson);
                    log.info("发送到kafka成功。");
                }catch (Exception e){
                   log.info(e.getMessage());
                }
            }
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}

	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfGoogle1(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				Thread.sleep(1000*5);
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Document doc = null;
				System.out.println(urlList.get(i));
				String docstr=ChromeUtil.getChromeDocnews(urlList.get(i),((i)*20+0)+"");
				if(docstr==null){
					continue;
				}
				doc=Jsoup.parse(docstr);
				Elements firstElementsLink = doc.select("g-card[class=ftSUBd]");
				//若果没有结果则不循环
				if(firstElementsLink.size()==0){
					break;
				}
				String info = doc.toString();
				for (int j = 0; j < firstElementsLink.size(); j++) {
					catchWebByMetaSearch= new CatchWebByMetaSearch();
//					System.out.println(firstElementsLink.get(j).toString());
					//标题
//					Elements e=firstElementsLink.get(j).select("div[class=mCBkyc tNxQIb y355M JIFdL JQe2Ld nDgy9d]");
					Elements e=firstElementsLink.get(j).select("div[class=\"mCBkyc y355M JQe2Ld nDgy9d\"]");
					//链接
					Elements a=firstElementsLink.get(j).select("a");
					//Elements timespan=firstElementsLink.get(j).select("span[class=WG9SHc]");
					System.out.println(e.get(0).text());
					System.out.println(a.get(0).attr("href"));
					catchWebByMetaSearch.setTid(tid);
					catchWebByMetaSearch.setSummary(urlList.get(i));
					//catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
					catchWebByMetaSearch.setTitle(e.get(0).text());
					//来源
					String origin=firstElementsLink.get(j).select("div[class=\"CEMjEf NUnG9d\"]").text();
					catchWebByMetaSearch.setSourcesite(origin);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
}
