package com.zzsn.local;


import com.fasterxml.jackson.databind.ObjectMapper;
import com.zzsn.utility.index.Constants;
import com.zzsn.utility.model.CatchWebByMetaSearch;
import com.zzsn.utility.util.ChromeUtil;
import com.zzsn.utility.util.DateUtil;
import com.zzsn.utility.util.Utility;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.kafka.core.KafkaTemplate;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

@Slf4j
public class SoRecorderUtil {

	// 提取搜狗新闻列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> catchWebOfSougouList(
			List<String> urlList, String charset, Long orgId, Long tid, KafkaTemplate kafkaTemplate) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();

			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString())
							.replaceAll("%2520", "+").replaceAll("%25", "%")
							.replaceAll("%20", "+");
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				Connection conn = Jsoup.connect(uri_code);
				conn.header("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50");
				Document doc = null;
				try {
					doc = conn.timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("360搜索中该关键词搜索没有相关新闻！");
					continue;
				}
				System.out.println("----360搜索----" + uri);
				Elements firstElementsLink = doc.select("li[data-from=\"news\"]");
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				for (int m=0;m<firstElementsLink.size();m++) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					Elements orainAndDate = firstElementsLink.get(m).select("div[class=\"info b-info\"]");

					if (orainAndDate.size()>0) {
						String orainAndDatestr = orainAndDate.select("span:eq(1)").text();
						//发布时间
						String publishDate = DateUtil.getPublishDate(orainAndDatestr);
						catchWebByMetaSearch.setPublishDate(publishDate);

						//来源
//						String orin = orainAndDatestr.split(" ")[0].trim();
						String orin = orainAndDate.select("span:eq(0)").text();;
						catchWebByMetaSearch.setSourcesite(orin);
					}


					Elements titleAndUrl = firstElementsLink.get(m).select("a");
					if (titleAndUrl.size()>0) {
						//标题
						String title = titleAndUrl.get(0).attr("title");
						catchWebByMetaSearch.setTitle(title);
						//源网址
						String addressurl = titleAndUrl.get(0).absUrl("href");
						catchWebByMetaSearch.setSourceaddress(addressurl);

					}
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					catchWebByMetaSearch.setSid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				for (CatchWebByMetaSearch catchMetaSearch:metaSearchList){

					List<CatchWebByMetaSearch> catchWebByMetaSearches=new ArrayList<>();
					catchWebByMetaSearches.add(catchMetaSearch);
					DetailSoSearchThread detailSouGouSearchThread=new DetailSoSearchThread();

					detailSouGouSearchThread.CatchWebNews(catchWebByMetaSearches,"");
				}
				catchWebByMetaSearchList.addAll(metaSearchList);

			}
			return catchWebByMetaSearchList;

		}  catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}

	public static String sendGet(String url) {
		String result = "";
		String urlName = url;
		try {
			URL realURL = new URL(urlName);
			URLConnection conn = realURL.openConnection();
			conn.setRequestProperty("accept", "*/*");
			conn.setRequestProperty("connection", "Keep-Alive");
			conn.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36");
			conn.connect();
			Map<String, List<String>> map = conn.getHeaderFields();
			for (String s : map.keySet()) {
				System.out.println(s + "-->" + map.get(s));
			}
			BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
			String line;
			while ((line = in.readLine()) != null) {
				result += "\n" + line;
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return result;
	}

	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> catchWebOfGoogleList(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				Thread.sleep(1000*2);
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Document doc = null;
				System.out.println(urlList.get(i));
//				String docstr=ChromeUtil.getChromeDocnews(urlList.get(i),((i)*20)+"");
				String docstr=ChromeUtil.getChromeDoc(urlList.get(i));
				if(docstr==null){
					continue;
				}
				doc=Jsoup.parse(docstr);
				Elements firstElementsLink = doc.select("g-card[class=ftSUBd]");
				//若果没有结果则不循环
				if(firstElementsLink.size()==0){
					break;
				}
				String info = doc.toString();
				for (int j = 0; j < firstElementsLink.size(); j++) {
					catchWebByMetaSearch= new CatchWebByMetaSearch();
//					System.out.println(firstElementsLink.get(j).toString());
					//标题
//					Elements e=firstElementsLink.get(j).select("div[class=mCBkyc tNxQIb y355M JIFdL JQe2Ld nDgy9d]");
					Elements e=firstElementsLink.get(j).select("div[class=\"mCBkyc y355M JQe2Ld nDgy9d\"]");
					//链接
					Elements a=firstElementsLink.get(j).select("a");
					//Elements timespan=firstElementsLink.get(j).select("span[class=WG9SHc]");
					System.out.println(e.get(0).text());
					System.out.println(a.get(0).attr("href"));
					catchWebByMetaSearch.setTid(tid);
					catchWebByMetaSearch.setSummary(urlList.get(i));
					//catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
					catchWebByMetaSearch.setTitle(e.get(0).text());
					//来源
					String origin=firstElementsLink.get(j).select("div[class=\"CEMjEf NUnG9d\"]").text();
					catchWebByMetaSearch.setSourcesite(origin);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfGoogle1(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				Thread.sleep(1000*5);
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Document doc = null;
				System.out.println(urlList.get(i));
				String docstr=ChromeUtil.getChromeDocnews(urlList.get(i),((i)*20+0)+"");
				if(docstr==null){
					continue;
				}
				doc=Jsoup.parse(docstr);
				Elements firstElementsLink = doc.select("g-card[class=ftSUBd]");
				//若果没有结果则不循环
				if(firstElementsLink.size()==0){
					break;
				}
				String info = doc.toString();
				for (int j = 0; j < firstElementsLink.size(); j++) {
					catchWebByMetaSearch= new CatchWebByMetaSearch();
//					System.out.println(firstElementsLink.get(j).toString());
					//标题
//					Elements e=firstElementsLink.get(j).select("div[class=mCBkyc tNxQIb y355M JIFdL JQe2Ld nDgy9d]");
					Elements e=firstElementsLink.get(j).select("div[class=\"mCBkyc y355M JQe2Ld nDgy9d\"]");
					//链接
					Elements a=firstElementsLink.get(j).select("a");
					//Elements timespan=firstElementsLink.get(j).select("span[class=WG9SHc]");
					System.out.println(e.get(0).text());
					System.out.println(a.get(0).attr("href"));
					catchWebByMetaSearch.setTid(tid);
					catchWebByMetaSearch.setSummary(urlList.get(i));
					//catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
					catchWebByMetaSearch.setTitle(e.get(0).text());
					//来源
					String origin=firstElementsLink.get(j).select("div[class=\"CEMjEf NUnG9d\"]").text();
					catchWebByMetaSearch.setSourcesite(origin);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
}
