package com.zzsn.crawler;

import com.zzsn.conf.SpringContextUtil;
import com.zzsn.entity.*;

import com.zzsn.search.ChromeUtil;
import com.zzsn.extrator.ExtEntity;
import com.zzsn.util.*;

import lombok.extern.slf4j.Slf4j;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.ssl.TrustStrategy;
import org.apache.http.util.EntityUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.scheduling.annotation.Async;

import javax.net.ssl.SSLContext;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Slf4j
public class MetaSearchThread  {

    public KeywordMsg keywordMsg;

//    @Override
//    public void run() {
//        crawler();
//    }

    public KeywordMsg getKeywordMsg() {
        return keywordMsg;
    }

    public void setKeywordMsg(KeywordMsg keywordMsg) {
        this.keywordMsg = keywordMsg;
    }
    public KafkaTemplate kafkaTemplate= SpringContextUtil.getBean(KafkaTemplate.class);

    @Async("asyncexecutorService")
    public void crawler(){
        String keyWord = keywordMsg.getKeyWord();
        List<String> keyWords = SplitKeyword.transForm(keyWord);
        for (String kWord :keyWords) {
            String url1= Constants.META_SEARCH_URL;
            String url = url1 + kWord;
            List<String> urlList = new ArrayList<String>();
            log.info("url：" + url);
            urlList.add(url);
//            String charset = locateCharSet(urlList.get(0));
            String charset = "utf-8";
            Long orgId=Long.parseLong(keywordMsg.getId());
            Long tid=Long.parseLong(keywordMsg.getId());
            String sid=keywordMsg.getId();
            String proxyid=Constants.PROXY;
            if(proxyid.equals("1")) {

            }else {
                List<ExtEntity> extEntitys=	ChromeUtil.getSougouweixinForOrgid1(kWord,orgId,tid);
                for (int i = 0; i < extEntitys.size(); i++) {

//                    sendExtentity(extEntitys.get(i), kw);
                }
            }
        }


    }

    private String locateCharSet(String url) {
        String encoding = "gbk";
        try {
            Connection conn = Jsoup.connect(url);
            conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");
            // 伪装成浏览器
            Document doc = conn.ignoreContentType(true).timeout(10000).get();
            Pattern p1 = Pattern.compile("<meta[^>]*>",
                    Pattern.CASE_INSENSITIVE);
            Matcher m1 = p1.matcher(doc.toString());
            while (m1.find()) {
                String str = m1.group();
                Pattern p2 = Pattern.compile("charset[^\\s||\"||;||'||>]*");
                Matcher m2 = p2.matcher(str);
                if (m2.find()) {
                    encoding = m2.group().substring(8);
                    if (encoding.trim().length() == 0) {
                        Pattern p3 = Pattern
                                .compile("charset=\"[^\\s||\"||;||>]*");
                        Matcher m3 = p3.matcher(str);
                        if (m3.find()) {
                            encoding = m3.group().substring(9);
                        }
                        if (encoding.trim().length() == 0) {
                            // encoding = DetectCharSet.detectCharSet(fileName);
                            // if(encoding == null){
                            encoding = "gbk";
                            // }
                        }
                    }

                    return encoding;
                }
            }
        } catch (IOException e) {
            // e.printStackTrace();
//            System.out.println("获取出错编码方式");
            log.info("获取出错编码方式");
            return encoding;
        }

        return encoding;
    }

    // 抓取新闻内容
//    private void CatchWebNews(List<CatchWebByMetaSearch> catchWebList,String keyword) {
//
//    }

    public boolean checkContentHavekeyword(String title,String content,String keywords){
        String text=title+content;
        boolean flag=true;
        String[] kwords = keywords.split("\\+");
        for (int i = 0; i <kwords.length ; i++) {
            if(!text.contains(kwords[i])){
              flag=false;
              break;
            }
        }
        return flag;
    }

    public ClbAnsProcessitem docInfoTrans2Processitem(DocInfo docInfo){
        ClbAnsProcessitem clbAnsProcessitem=new ClbAnsProcessitem();
        clbAnsProcessitem.setSid(docInfo.getSid()+"");
        clbAnsProcessitem.setTitle(docInfo.getTitle());
        clbAnsProcessitem.setContent(docInfo.getContentNoTag());
        clbAnsProcessitem.setContentWithtag(docInfo.getContentWithTag());
        clbAnsProcessitem.setSummary(docInfo.getSummary());
        clbAnsProcessitem.setAuthor(docInfo.getAuthor());
        clbAnsProcessitem.setOrigin(docInfo.getOrigin());
        clbAnsProcessitem.setPublishDate(docInfo.getPublishDate());
        clbAnsProcessitem.setSourceAddress(docInfo.getSourceaddress());

        return clbAnsProcessitem;
    }
    //转换qq新闻链接
    public static String transqqURl(String oldurl){
        String patt="https://new.qq.com/omn/[date]/[pamars].html";
        String b1=oldurl.substring(oldurl.lastIndexOf("/")+1);
        String b2=getNumbers(b1);
        String curl=patt.replace("[date]",b2).replace("[pamars]",b1);
        return curl;
    }
    public static String getNumbers(String content) {

        Pattern pattern = Pattern.compile("\\d+");
        Matcher matcher = pattern.matcher(content);
        while (matcher.find()) {
            return matcher.group(0);
        }
        return "";
    }

    //根据url获取html内容
    private  String getContentByUrl(String infourl){
        String infodata = "";
        String charset="utf-8";
        CloseableHttpClient httpClient =createSSLClientDefault();
        HttpGet httpgeturl = new HttpGet(infourl);// Get请求
        httpgeturl.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 60000);
//        httpgeturl.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 60000);
        // 伪装成浏览器
        httpgeturl.setHeader("Content-Type","application/x-www-form-urlencoded;charset=utf-8");
        httpgeturl.setHeader("User-Agent", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US);");
        httpgeturl.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        HttpResponse httprespse=null;
        try {
            httprespse = httpClient.execute(httpgeturl);
        } catch (Exception e1) {
            System.out.println("请求失败。。更换协议");
            httpClient=createSSLClientDefaulttsl12();
            try {
                httprespse = httpClient.execute(httpgeturl);
            }catch (Exception e2) {
                log.info(e2.getMessage());
                return infodata;
            }
        }
        HttpEntity entitydata = httprespse.getEntity();// 获取返回数据
        // 上次更新时间取得
        Header lastModify = httprespse.getFirstHeader("Last-Modified");
        if (lastModify == null) {
            lastModify = httprespse.getLastHeader("Last-Modified");
        }
        String charstype = EntityUtils.getContentCharSet(entitydata);
        if (charstype != null) {
            charset = charstype;
        } else {
            charset = locateCharSet(infourl);
        }
        charset = Utility.charsetcheck(charset);
        try {
            infodata = EntityUtils.toString(entitydata, charset);
        }catch (Exception e3){
            log.info(e3.getMessage());
            return infodata;
        }
        httpgeturl.releaseConnection();
        return infodata;
    }

    private  CloseableHttpClient createSSLClientDefault(){
        try {
            SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, new TrustStrategy() {
                //信任所有
                @Override
                public boolean isTrusted(
                        java.security.cert.X509Certificate[] arg0, String arg1)
                        throws java.security.cert.CertificateException {
                    // TODO Auto-generated method stub
                    return true;
                }

            }).build();
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
            return HttpClients.custom().setSSLSocketFactory(sslsf).build();
        } catch (KeyManagementException e) {
            e.printStackTrace();
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        } catch (KeyStoreException e) {
            e.printStackTrace();
        }
        return  HttpClients.createDefault();
    }
    private  CloseableHttpClient createSSLClientDefaulttsl12(){
        try {
            SSLContext sslContext = new SSLContextBuilder().useProtocol("TLSv1.2").loadTrustMaterial(null, new TrustStrategy() {
                //信任所有
                @Override
                public boolean isTrusted(
                        java.security.cert.X509Certificate[] arg0, String arg1)
                        throws java.security.cert.CertificateException {
                    // TODO Auto-generated method stub
                    return true;
                }

            }).build();
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
            return HttpClients.custom().setSSLSocketFactory(sslsf).build();
        } catch (KeyManagementException e) {
            e.printStackTrace();
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        } catch (KeyStoreException e) {
            e.printStackTrace();
        }
        return  HttpClients.createDefault();
    }
    /**
     * 爬取图片
     * 创建人:  李东亮
     * 创建时间:  2016-5-10 上午10:57:20
     * @version 1.0
     * @param contentWithTag
     * @param sourceaddress
     * @throws Exception
     */
    private ContentFileResult getContentFile(String contentWithTag,String sourceaddress)throws Exception{
        String contentImgCvtTag = contentWithTag;
        String formatImgContent= contentWithTag;
        Map<String, FileTag> imgDataMap = ContentFileFinder.getContentFileTag(contentWithTag,sourceaddress);
        //key为图片爬取路径，value为图片保存路径
        Map<String, FileTag> imgMap = new HashMap<String, FileTag>();
        for (String key : imgDataMap.keySet()) {
            FileTag fileTag = imgDataMap.get(key);
            while (contentImgCvtTag.contains(key)) {
                //IMG_SERVER开头的路径
                contentImgCvtTag = contentImgCvtTag.replace(key, fileTag.getSaveTag());
            }
//            while (formatImgContent.contains(fileTag.getAbsolutePath())&&!key.equals(fileTag.getAbsoluteTag())) {
//                //转换为绝对路径
//                formatImgContent = formatImgContent.replace(key, fileTag.getAbsoluteTag());
//            }
            imgMap.put(fileTag.getAbsolutePath(), fileTag);
        }

        ContentFileResult cis = new ContentFileResult();
        cis.setContentAbsoulute(formatImgContent);
        cis.setContentImgCvtTag(contentImgCvtTag);
        cis.setFileMap(imgMap);
        return cis;
    }

    public InputStream getImg(String dataUrl){
        CloseableHttpClient httpClient = createSSLClientDefault();

        CloseableHttpResponse response = null;
        InputStream instream =null;
        try {
            HttpGet get = new HttpGet();
            get.setURI(new URI(dataUrl));
            response = httpClient.execute(get);
            HttpEntity entity = response.getEntity();
            if (entity != null) {
                //创建一个输入流对象
                instream = entity.getContent();

//                 BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
//                 StringBuilder sb = new StringBuilder();
//                 String line = null;
//                 while ((line = reader.readLine()) != null) {
//                     sb.append(line + "\n");
//                 }
//                 instream.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        return instream;
    }

}
