package com.zzsn.search.util;

import com.zzsn.utility.model.CatchWebByMetaSearch;
import com.zzsn.utility.util.ChromeUtil;
import com.zzsn.utility.util.DateUtil;
import com.zzsn.utility.util.Utility;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.http.*;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.EntityUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RecorderUtil {

	// 提取百度新闻列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfBaidu(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString())
							.replaceAll("%2520", "+").replaceAll("%25", "%")
							.replaceAll("%20", "+");
//					uri_code ="http://news.baidu.com/ns?rn=50&ie=utf-8&ct=0&bs=%E2%80%9C%E5%9B%BD%E4%BD%8F%E4%BA%BA%E5%B1%85%E5%B7%A5%E7%A8%8B%E9%A1%BE%E9%97%AE%E2%80%9D&rsv_bp=1&sr=0&cl=2&f=3&prevct=no&tn=news&word=%E5%9B%BD%E4%BD%8F%E4%BA%BA%E5%B1%85%E5%B7%A5%E7%A8%8B%E9%A1%BE%E9%97%AE&rsv_sug3=3&rsv_sug4=711&rsv_sug1=1&rsp=0&inputT=3820&rsv_sug=1";
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				Connection conn = Jsoup.connect(uri_code);
				conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");

				Document doc = null;
				try {
					doc = conn.timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("百度搜索中该关键词搜索没有相关新闻！");
					continue;
				}
				System.out.println("----百度搜索----" + uri);
				try {
					Thread.sleep(2000L);
				} catch (InterruptedException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				Elements firstElementsLink = doc.select("div.result");
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				for (int m=0;m<firstElementsLink.size();m++) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					Elements orainAndDate = firstElementsLink.get(m).select("p.c-author");
					
					if (orainAndDate.size()>0) {
						String orainAndDatestr = orainAndDate.text();
						//发布时间
						String publishDate = DateUtil.getPublishDate(orainAndDatestr);
						catchWebByMetaSearch.setPublishDate(publishDate);
						
						//来源
						String orin = orainAndDatestr.split(" ")[0].trim();
						catchWebByMetaSearch.setSourcesite(orin);
					}


					Elements titleAndUrl = firstElementsLink.get(m).select("a[data-click]");
					
					if (titleAndUrl.size()>0) {
						//标题
						String title = titleAndUrl.get(0).text().trim();
						catchWebByMetaSearch.setTitle(title);
						//源网址
						String addressurl= titleAndUrl.attr("href");
						catchWebByMetaSearch.setSourceaddress(addressurl);
					}

					
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}

	
	// 提取百度新闻列表URL
		@SuppressWarnings("deprecation")
		public static List<CatchWebByMetaSearch> CatchWebOfBaiduByProxy(
				List<String> urlList, String charset, Long orgId, Long tid) {
			try {
				List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
				for (int i = 0; i < urlList.size(); i++) {
					
					try {
						URL url = new URL(urlList.get(i));
						URI uri = null;
						String uri_code = "";
						try {
							uri = new URI(url.getProtocol(), url.getHost(),
									url.getPath(), url.getQuery(), null);
							uri_code = Utility.encodURI(uri.toString())
									.replaceAll("%2520", "+").replaceAll("%25", "%")
									.replaceAll("%20", "+");
//						uri_code ="http://news.baidu.com/ns?rn=50&ie=utf-8&ct=0&bs=%E2%80%9C%E5%9B%BD%E4%BD%8F%E4%BA%BA%E5%B1%85%E5%B7%A5%E7%A8%8B%E9%A1%BE%E9%97%AE%E2%80%9D&rsv_bp=1&sr=0&cl=2&f=3&prevct=no&tn=news&word=%E5%9B%BD%E4%BD%8F%E4%BA%BA%E5%B1%85%E5%B7%A5%E7%A8%8B%E9%A1%BE%E9%97%AE&rsv_sug3=3&rsv_sug4=711&rsv_sug1=1&rsp=0&inputT=3820&rsv_sug=1";
						} catch (URISyntaxException e) {
							// TODO Auto-generated catch block
							e.printStackTrace();
						}
						//HttpResponse httpresponse=getMethod2(uri_code);
						 HttpClient client = null;
						 //创建httpGet
						 Document doc = null;
						 HttpGet httpGet = null;
						 try {
							 Thread.sleep(8500L);
							  client = getHttpClient();
							  httpGet = new HttpGet(uri_code);
						 HttpGet request = null;
							 httpGet.getParams().setIntParameter(
									CoreConnectionPNames.CONNECTION_TIMEOUT, 60000);
							 httpGet.getParams().setParameter(
									HttpMethodParams.SO_TIMEOUT, 60000);
							// 伪装成浏览器
							 httpGet.setHeader("Content-Type",
									"application/x-www-form-urlencoded;charset=utf-8");
							 httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US);");
							 httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
							 httpGet.setHeader(HttpHeaders.CONNECTION, "close");
						 HttpResponse response = client.execute(httpGet);
						
						 HttpEntity resEntity = response.getEntity();
						 String rtnStr = EntityUtils.toString(resEntity, "UTF-8");
						 File file = new File("D:\\output333111.txt");
						 if(file.exists()) {
							 file.delete();
							 file = new File("D:\\output333111.txt");
						 }
						 
							try {
							    BufferedWriter bw = new BufferedWriter(new FileWriter(file,true));
							    StringBuffer out = new StringBuffer();
							    out.append("标题："+rtnStr);
							    out.append("\r\n");
							    bw.write(out.toString());
							    bw.flush();
							    bw.close();
							} catch (IOException e) {
							    e.printStackTrace();
							}
						 
						 doc=Jsoup.parse(rtnStr);
						
						} catch (Exception e) {
							e.printStackTrace();
						} finally {
							if (null!=httpGet) {

								httpGet.abort();
							}
							if (null!=client) {

								client.getConnectionManager().closeIdleConnections(0,
										TimeUnit.MICROSECONDS);

							}
							
							

						}
						
						
						System.out.println("----百度搜索----" + urlList.get(i));
						
						Elements firstElementsLink = doc.select("div.result-op");
						List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
						CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
						for (int m=0;m<firstElementsLink.size();m++) {
							catchWebByMetaSearch = new CatchWebByMetaSearch();
							Elements orainAndDate = firstElementsLink.get(m).select("span");
							
							if (orainAndDate.size()>0) {
								String orainAndDatestr = orainAndDate.text();
								//发布时间
								String publishDate = DateUtil.getPublishDate(orainAndDatestr);
								catchWebByMetaSearch.setPublishDate(publishDate);
								
								//来源
								String orin = orainAndDate.get(0).text();
								catchWebByMetaSearch.setSourcesite(orin);
							}


							Elements titleAndUrl = firstElementsLink.get(m).select("a[data-click]");
							
							if (titleAndUrl.size()>0) {
								//标题
								String title = titleAndUrl.get(0).text().trim();
								catchWebByMetaSearch.setTitle(title);
								//源网址
								String addressurl= titleAndUrl.attr("href");
								catchWebByMetaSearch.setSourceaddress(addressurl);
								System.out.println(addressurl);
							}

							
							catchWebByMetaSearch.setOrgId(orgId);
							catchWebByMetaSearch.setTid(tid);
							metaSearchList.add(catchWebByMetaSearch);
						}
						catchWebByMetaSearchList.addAll(metaSearchList);
					} catch (Exception e) {
						// TODO Auto-generated catch block
						i--;
					}
				}
				return catchWebByMetaSearchList;

			} catch (Exception e) {
				e.printStackTrace();
			}
			return null;
		}
	// 提取百度新闻列表URL
		@SuppressWarnings("deprecation")
		public static List<CatchWebByMetaSearch> CatchWebOfBaidu1(
				List<String> urlList, String charset, Long orgId, Long tid) {
			try {
				List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
				for (int i = 0; i < urlList.size(); i++) {
					String docstr=ChromeUtil.getChromeDoc(urlList.get(i));
					Document doc = Jsoup.parse(docstr);
					
					System.out.println("----百度搜索----" + urlList.get(i));
					Elements firstElementsLink = doc.select("div.result-op");
					List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
					CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
					for (int m=0;m<firstElementsLink.size();m++) {
						catchWebByMetaSearch = new CatchWebByMetaSearch();
						Elements orainAndDate = firstElementsLink.get(m).select("span");
						if (orainAndDate.size()>0) {
							String orainAndDatestr = orainAndDate.text();
							//发布时间
							String publishDate = DateUtil.getPublishDate(orainAndDatestr);
							catchWebByMetaSearch.setPublishDate(publishDate);
							//来源
							String orin = orainAndDate.get(0).text();
							catchWebByMetaSearch.setSourcesite(orin);
						}

						Elements titleAndUrl = firstElementsLink.get(m).select("a[data-click]");
						if (titleAndUrl.size()>0) {
							//标题
							String title = titleAndUrl.get(0).text().trim();
							catchWebByMetaSearch.setTitle(title);
							//源网址
							String addressurl= titleAndUrl.attr("href");
							catchWebByMetaSearch.setSourceaddress(addressurl);
							System.out.println(addressurl);
						}
						catchWebByMetaSearch.setOrgId(orgId);
						catchWebByMetaSearch.setTid(tid);
						catchWebByMetaSearch.setSid(tid);
						metaSearchList.add(catchWebByMetaSearch);
					}
					catchWebByMetaSearchList.addAll(metaSearchList);
				}
				return catchWebByMetaSearchList;

			} catch (ParseException e) {
				e.printStackTrace();
			} 
			return null;
		}
	// 提取百度首页搜索列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfBaiduHomeSearch(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				System.out.println("url"+urlList.size());
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString())
							.replaceAll("%2520", "+").replaceAll("%25", "%")
							.replaceAll("%20", "+");
//					uri_code ="http://news.baidu.com/ns?rn=50&ie=utf-8&ct=0&bs=%E2%80%9C%E5%9B%BD%E4%BD%8F%E4%BA%BA%E5%B1%85%E5%B7%A5%E7%A8%8B%E9%A1%BE%E9%97%AE%E2%80%9D&rsv_bp=1&sr=0&cl=2&f=3&prevct=no&tn=news&word=%E5%9B%BD%E4%BD%8F%E4%BA%BA%E5%B1%85%E5%B7%A5%E7%A8%8B%E9%A1%BE%E9%97%AE&rsv_sug3=3&rsv_sug4=711&rsv_sug1=1&rsp=0&inputT=3820&rsv_sug=1";
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				Connection conn = Jsoup.connect(uri_code);
				conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");

				Document doc = null;
				try {
					doc = conn.timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("百度首页搜索中该关键词搜索没有相关新闻！");
					continue;
				}
				System.out.println("----百度首页搜索----" + uri);
				Elements firstElementsLink = doc.select("div.result");
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				for (int m=0;m<firstElementsLink.size();m++) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					
					Elements titleAndUrl = firstElementsLink.get(m).select("a[data-click]");
					
					if (titleAndUrl.size()>0) {
						//标题
						String title = titleAndUrl.get(0).text().trim();
						if(null != title && title.trim().length() > 0){
							title = title.replaceAll("</?[^>]+>", "");
						}else{
							continue;
						}
						if(null != title && title.trim().length() > 0){
							catchWebByMetaSearch.setTitle(title);
						}else{
							continue;
						}
						//源网址
						String addressurl= titleAndUrl.attr("href");
						if(null != addressurl && addressurl.trim().length() > 0){
							addressurl = getRealUrlFromBaiduUrl(addressurl);
						}else{
							continue;
						}
						if(null != addressurl && addressurl.trim().length() > 0){
							catchWebByMetaSearch.setSourceaddress(addressurl);
						}else{
							continue;
						}
					}
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}
	
    public static String getRealUrlFromBaiduUrl(String url) {
        Connection.Response res = null;
        int itimeout = 60000;
        try {
            res = Jsoup.connect(url).timeout(itimeout).method(Connection.Method.GET).followRedirects(false).execute();
            return res.header("Location");
        } catch (IOException e) {
            e.printStackTrace();
        }
        return "";
    }
	
	// 提取搜狗新闻列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfSougou(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString())
							.replaceAll("%2520", "+").replaceAll("%25", "%")
							.replaceAll("%20", "+");
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}

				Connection conn = Jsoup.connect(uri_code);
				conn.header("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50");

				Document doc = null;
				try {
					doc = conn.timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("搜狗搜索中该关键词搜索没有相关新闻！");
					continue;
				}
				System.out.println("----搜狗搜索----" + uri);
				Elements firstElementsLink = doc.select("div.vrwrap");
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				for (int m=0;m<firstElementsLink.size();m++) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					Elements orainAndDate = firstElementsLink.get(m).select("p.news-from");
					
					if (orainAndDate.size()>0) {
						String orainAndDatestr = orainAndDate.text();
						//发布时间
						String publishDate = DateUtil.getPublishDate(orainAndDatestr);
						catchWebByMetaSearch.setPublishDate(publishDate);
						
						//来源
						String orin = orainAndDatestr.split(" ")[0].trim();
						catchWebByMetaSearch.setSourcesite(orin);
					}


					Elements titleAndUrl = firstElementsLink.get(m).select("a");
					if (titleAndUrl.size()>0) {
						//标题
						String title = titleAndUrl.get(0).text().trim();
						catchWebByMetaSearch.setTitle(title);
						//源网址
						String addressurl= titleAndUrl.get(0).attr("href");
						catchWebByMetaSearch.setSourceaddress(addressurl);
					}

					
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
				
			}
			return catchWebByMetaSearchList;

		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}

	// 提取好搜新闻列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfHaosou(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString())
							.replaceAll("%2520", "+").replaceAll("%25", "%")
							.replaceAll("%20", "+");
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}

				Connection conn = Jsoup.connect(uri_code);
				conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");

				Document doc = null;
				try {
					doc = conn.timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("好搜搜索中该关键词搜索没有相关新闻！");
					continue;
				}
				System.out.println("----好搜搜索----" + uri);
				
				Elements firstElementsLink = doc.select("li[data-from=\"news\"]");
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				for (int m=0;m<firstElementsLink.size();m++) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					Elements orain = firstElementsLink.get(m).select("span.sitename");
					if (orain.size()>0) {
						//来源
						String orin = orain.text();
						catchWebByMetaSearch.setSourcesite(orin);

					}
					
					Elements pubDate = firstElementsLink.get(m).select("span.posttime");
					if (pubDate.size()>0) {
						//发布时间
						String orinpublishDate = pubDate.attr("title");
						String publishDate = DateUtil.getPublishDate(orinpublishDate);
						catchWebByMetaSearch.setPublishDate(publishDate);

					}

					Elements titleAndUrl = firstElementsLink.get(m).select("a");
					if (titleAndUrl.size()>0) {
						//标题
						String title = titleAndUrl.text().trim();
						catchWebByMetaSearch.setTitle(title);
						//源网址
						String addressurl= titleAndUrl.attr("href");
						catchWebByMetaSearch.setSourceaddress(addressurl);
					}

					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);

			}
			return catchWebByMetaSearchList;

		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}

	// 提取中国搜索新闻列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfChinaso(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString())
							.replaceAll("%2520", "+").replaceAll("%25", "%")
							.replaceAll("%20", "+");
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				System.out.println("----中国搜索----" + uri);
				
				Connection conn = Jsoup.connect(uri_code);
				conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");

				Document doc = null;
				try {
					doc = conn.timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("中国搜索中该关键词搜索没有相关新闻！");
					continue;
				}
				
				
				Elements firstElementsLink = doc.select("li[class=\"reItem \"]");
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				for (int m=0;m<firstElementsLink.size();m++) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					Elements orainAndDate = firstElementsLink.get(m).select("p[class=\"snapshot\"]");
					
					if (orainAndDate.size()>0) {
						String orainAndDatestr = orainAndDate.text();
						//发布时间
						String publishDate = DateUtil.getPublishDate(orainAndDatestr);
						catchWebByMetaSearch.setPublishDate(publishDate);
						
						//来源
						String orin = orainAndDatestr.replaceAll("\\d", "").replaceAll("-", "").trim();
						catchWebByMetaSearch.setSourcesite(orin);
					}

					Elements titleAndUrlh2 = firstElementsLink.get(m).select("h2");
					if (titleAndUrlh2.size()>0) {
						Elements titleAndUrl = firstElementsLink.get(m).select("a");
						if (titleAndUrl.size()>0) {
							//标题
							String title = titleAndUrl.get(0).text().trim();
							catchWebByMetaSearch.setTitle(title);
							//源网址
							String addressurl= titleAndUrl.attr("href");
							catchWebByMetaSearch.setSourceaddress(addressurl);
						}

					}

					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
				

			}
			return catchWebByMetaSearchList;

		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}

	// 提取雅虎搜索新闻列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfYahoo(
			List<String> urlList, String charset, Long orgId, Long tid) {
		List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
		try {
			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
//				URL url = new URL("https://news.search.yahoo.com/search?p=ETC");
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString()).replaceAll(
							"%2520", "+");
					// uri_code =
					// "http://news.search.yahoo.co.jp/search?ei=UTF-8&p=%E6%96%B0%E5%B9%B9%E7%B7%9A";
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				System.out.println("----雅虎搜索----" + uri);
				Connection conn = Jsoup.connect(uri_code);
				conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36)");
				Document doc;
				try {
					doc = conn.timeout(10000).get();
				} catch (Exception ex) {
					System.out.println("雅虎搜索中该关键词搜索没有相关新闻！");
					continue;
				}

				Elements ElementsLink = doc.select("ol[class=\" reg searchCenterMiddle\"]");
				if (ElementsLink.size()>0) {
					Elements firstElementsLink = ElementsLink.select("li");
					List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
					CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
					for (int m=0;m<firstElementsLink.size();m++) {
						catchWebByMetaSearch = new CatchWebByMetaSearch();
						Elements orain = firstElementsLink.get(m).select("span.cite");
						if (orain.size()>0) {
							//来源
							String orin = orain.text();
							catchWebByMetaSearch.setSourcesite(orin);

						}
//						
//						Elements pubDate = firstElementsLink.get(m).select("span.posttime");
//						if (pubDate.size()>0) {
//							//发布时间
//							String orinpublishDate = pubDate.attr("title");
//							String publishDate = DateUtil.getPublishDate(orinpublishDate);
//							catchWebByMetaSearch.setPublishDate(publishDate);
	//
//						}

						Elements titleAndUrl = firstElementsLink.get(m).select("div.compTitle");
						if (titleAndUrl.size()>0) {
							Elements titleAndUrl1 = titleAndUrl.select("a");
							if (titleAndUrl1.size()>0) {
								//标题
								String title = titleAndUrl1.get(0).text().trim();
								catchWebByMetaSearch.setTitle(title);
								//源网址
								String addressurl= titleAndUrl1.attr("href");
								catchWebByMetaSearch.setSourceaddress(addressurl);
							}

						}

						catchWebByMetaSearch.setOrgId(orgId);
						catchWebByMetaSearch.setTid(tid);
						metaSearchList.add(catchWebByMetaSearch);
					}
					catchWebByMetaSearchList.addAll(metaSearchList);
				}

				

			}
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			// e.printStackTrace();
			return catchWebByMetaSearchList;
		}
		// return null;
	}

	// 提取雅虎搜索web列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfYahooWeb(
			List<String> urlList, String charset, Long orgId, Long tid) {
		List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
		try {
			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString()).replaceAll(
							"%2520", "+");
					// uri_code =
					// "http://news.search.yahoo.co.jp/search?ei=UTF-8&p=%E6%96%B0%E5%B9%B9%E7%B7%9A";
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				System.out.println("----雅虎Web搜索----" + uri);
				Document doc = null;
				try {
					doc = Jsoup
							.connect(uri_code)
							.userAgent(
									"Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)")
							.ignoreContentType(true).timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("雅虎Web搜索中该关键词搜索没有相关新闻！");
					continue;
				}

				String info = doc.toString();
				// System.out.println(info);

				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Pattern pattern = Pattern
						.compile("<div class=\"w\"[\\s\\S]+?(?=./div>)+[\\s\\S]+?(?=./div>)+[\\s\\S]+?(?=./div>)");
				Matcher mat = pattern.matcher(info);
				while (mat.find()) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					Pattern pattern_title = Pattern
							.compile("h3>[\\s\\S]+?(?=./h3>)");
					Matcher match_url_title = pattern_title
							.matcher(mat.group());
					if (match_url_title.find()) {
						// url匹配
						Matcher match_url = Pattern
								.compile("href=\"http[^\"]+").matcher(
										match_url_title.group());
						// title匹配
						Matcher match_title = Pattern.compile(
								"\">[\\s\\S]+?(?=./a>)").matcher(
								match_url_title.group());

						if (match_url.find()) {
							if (match_url.group().length() > 6) {
								catchWebByMetaSearch.setSourceaddress(match_url
										.group().substring(6));
							}
						}
						if (match_title.find()) {
							// 标题<strong>
							String title = "";
							if (match_title.group().length() > 2) {
								title = match_title.group().substring(2)
										.replaceAll("<b>", "")
										.replaceAll("</b>", "")
										.replaceAll("<strong>", "")
										.replaceAll("</strong>", "")
										.replaceAll("\"", "");
								catchWebByMetaSearch.setTitle(title);
							}
						}
					}

					// 摘要匹配
					Matcher match_summary = Pattern.compile(
							"p>[\\s\\S]+?(?=./p>)").matcher(mat.group());

					if (match_summary.find()) {
						// 摘要
						String summary = "";
						if (match_summary.group().length() > 2) {
							summary = match_summary.group().substring(2)
									.replaceAll("<b>", "")
									.replaceAll("</b>", "")
									.replaceAll("<strong>", "")
									.replaceAll("</strong>", "")
									.replaceAll("\"", "");
							catchWebByMetaSearch.setSummary(summary);
						}
					}
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			// e.printStackTrace();
			return catchWebByMetaSearchList;
		}
		// return null;
	}



	// 提取凤凰新闻列表URL
	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfIFeng(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				URL url = new URL(urlList.get(i));
				URI uri = null;
				String uri_code = "";
				try {
					uri = new URI(url.getProtocol(), url.getHost(),
							url.getPath(), url.getQuery(), null);
					uri_code = Utility.encodURI(uri.toString())
							.replaceAll("%2520", "+").replaceAll("%25", "%")
							.replaceAll("%20", "+");
				} catch (URISyntaxException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				// HttpClient httpClient = new DefaultHttpClient();
				// httpClient.getParams().setIntParameter("http.socket.timeout",
				// 60000);
				// System.out.println("----搜狗搜索----"+uri);
				// HttpGet httpget;
				// try{
				// httpget = new HttpGet(uri);// Get请求
				// } catch (Exception ex) {
				// System.out.println("搜狗中该关键词搜索没有相关新闻！");
				// continue;
				// }
				//
				// HttpResponse httpresponse = httpClient.execute(httpget); //
				// 发送请求
				// HttpEntity entity = httpresponse.getEntity();// 获取返回数据
				// String chars = EntityUtils.getContentCharSet(entity);
				// if(chars != null){
				// charset = chars;
				// }
				// String info =EntityUtils.toString(entity, charset);

				Document doc = null;
				try {
					doc = Jsoup
							.connect(uri_code)
							.userAgent(
									"Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)")
							.ignoreContentType(true).timeout(10000).get();
				} catch (Exception ex) {
					// ex.printStackTrace();
					System.out.println("凤凰搜索中该关键词搜索没有相关新闻！");
					continue;
				}
				Elements firstElementsLink = doc.select("a[data-click]");
				System.out.println("----凤凰搜索----" + uri);
				String info = doc.toString();

				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Pattern pattern = Pattern
						.compile("<div class=\"searchResults\"[\\s\\S]+?(?=./div>)");
				Matcher mat = pattern.matcher(info);
				while (mat.find()) {
					catchWebByMetaSearch = new CatchWebByMetaSearch();
					// url匹配
					Matcher match_url = Pattern.compile("a href=\"http:[^\"]+")
							.matcher(mat.group());
					// title匹配
					Matcher match_title = Pattern.compile(
							"blank\">[\\s\\S]+?(?=./a>)").matcher(mat.group());
					// 摘要匹配
					Matcher match_summary = Pattern.compile(
							"p>原标题：[\\s\\S]+?(?=./p>)").matcher(
							mat.group());
					// 来源匹配
					Matcher match_souse = Pattern.compile(
							"p><font[\\s\\S]+?(?=./font>)").matcher(
							mat.group());
					// 发布时间匹配
					Matcher match_publishdate = Pattern.compile(
							"凤凰资讯[\\s\\S]+?(?=.</font>)").matcher(
							mat.group());

					if (match_url.find()) {
						// url
						if (match_url.group().length() > 8) {
							catchWebByMetaSearch.setSourceaddress(match_url
									.group().substring(8));
						}
					}
					if (match_title.find()) {
						// 标题
						String title = "";
						if (match_title.group().length() > 7) {
							title = match_title.group().substring(7)
									.replaceAll("<font color=\"red\">", "")
									.replaceAll("</font>", "")
									.replaceAll("\\n", "");
							catchWebByMetaSearch.setTitle(title);
						}
					}
					if (match_summary.find()) {
						// 摘要
						String summary = "";
						if (match_summary.group().length() > 6) {
							summary = match_summary.group().substring(6)
									.replaceAll("</p>", "")
									.replaceAll("\\n", "");
							catchWebByMetaSearch.setSummary(summary);
						}
					}
					// 来源
					catchWebByMetaSearch.setSourcesite("凤凰资讯");
					if (match_publishdate.find()) {
						// 发布时间
						String dat = DateUtil.getPublishDate(match_publishdate
								.group());
						if (dat != null && dat.length() > 0) {
							catchWebByMetaSearch.setPublishDate(dat);
						}
					}
//					System.out.println(catchWebByMetaSearch.getTitle());
//					System.out.println(catchWebByMetaSearch.getSourceaddress());
//					System.out.println(catchWebByMetaSearch.getSummary());
//					System.out.println(catchWebByMetaSearch.getPublishDate());
//					System.out.println(catchWebByMetaSearch.getSourcesite());
					
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setTid(tid);
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}
	 public static HttpResponse getMethod2(String dataUrl){
	    	
	    	StringBuffer sb = new StringBuffer();
	    	 //创建HttpClient实例
	    	 HttpClient client = null;
	    	 //创建httpGet
	    	 HttpGet httpGet = null;
	    	 try {
	    		  client = getHttpClient();
	    		  httpGet = new HttpGet(dataUrl);
	    	 HttpGet request = null;
	    		 Thread.sleep(500L);
	    		 httpGet.getParams().setIntParameter(
	 					CoreConnectionPNames.CONNECTION_TIMEOUT, 60000);
	    		 httpGet.getParams().setParameter(
	 					HttpMethodParams.SO_TIMEOUT, 60000);
	 			// 伪装成浏览器
	    		 httpGet.setHeader("Content-Type",
	 					"application/x-www-form-urlencoded;charset=utf-8");
	    		 httpGet.setHeader(HttpHeaders.CONNECTION, "close");
	    	 HttpResponse response = client.execute(httpGet);
			
	    	
			   return response;
	        } catch (Exception e) {
	        	e.printStackTrace();
	        } finally {
	        
				

	        }
			
	        return null;
	    }

	//            106.111.73.25-35946-hys_81310170_41c8-12345678
//            60.184.197.64-34013-hys_81310170_41c8-12345678
//            114.99.221.245-39604-hys_81310170_41c8-12345678
//            49.82.130.253-52312-hys_81310170_41c8-12345678
	/**默认代理地址*/
	public static String PROXY_ADDR = "114.99.221.245";
	/**默认代理接口*/
	public static int PROXY_PORT = 39604;
	public static String  getProxyIp(){
		List<String> proxyList=new ArrayList<>();
		proxyList.add("106.111.73.25-35946-hys_81310170_41c8-12345678");
		proxyList.add("60.184.197.64-34013-hys_81310170_41c8-12345678");
		proxyList.add("114.99.221.245-39604-hys_81310170_41c8-12345678");
		proxyList.add("49.82.130.253-52312-hys_81310170_41c8-12345678");
		Random random = new Random();
		int n = random.nextInt(proxyList.size());
		return proxyList.get(n);

	}
	 public static HttpClient getHttpClient() {
//		 List<ProxySite> s=DBServiceFactory.getProxyService().querybyIds(Constants.PROXYID);
//		 String[] proxys=s.get(0).getProxy().split("-");
		 String[] proxys=getProxyIp().split("-");
	    	 DefaultHttpClient httpClient = new DefaultHttpClient();
	    	 String proxyHost = proxys[0];
	    	 int proxyPort = Integer.parseInt(proxys[1]);
	    	 String userName = proxys[2];
	    	 String password = proxys[3];
	    	 httpClient.getCredentialsProvider().setCredentials(
	    	   new AuthScope(proxyHost, proxyPort),
	    	   new UsernamePasswordCredentials(userName, password));
	    	 HttpHost proxy = new HttpHost(proxyHost,proxyPort);
	    	 httpClient.getParams().setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
	    	 return httpClient;
	    }
	 
	 
	// 提取百度网页列表URL
			@SuppressWarnings("deprecation")
			public static List<CatchWebByMetaSearch> CatchWebOfBaidusosuo(
					List<String> urlList, String charset, Long orgId, Long tid) {
				try {
					List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
					for (int i = 0; i < urlList.size(); i++) {
						
						String docstr=ChromeUtil.getChromeDoc(urlList.get(i));
						Document doc = Jsoup.parse(docstr);
						
						System.out.println("----百度搜索----" + urlList.get(i));
						Elements firstElementsLink = doc.select("div.result");
						List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
						CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
						for (int m=0;m<firstElementsLink.size();m++) {
							catchWebByMetaSearch = new CatchWebByMetaSearch();
							Elements orainAndDate = firstElementsLink.get(m).select("span");
							
							if (orainAndDate.size()>0) {
								String orainAndDatestr = orainAndDate.text();
								//发布时间
								String publishDate = DateUtil.getPublishDate(orainAndDatestr);
								catchWebByMetaSearch.setPublishDate(publishDate);
								
								//来源
								//String orin = orainAndDate.get(0).text();
								//catchWebByMetaSearch.setSourcesite(orin);
							}


							Elements titleAndUrl = firstElementsLink.get(m).select("a[data-click]");
							
							if (titleAndUrl.size()>0) {
								//标题
								String title = titleAndUrl.get(0).text().trim();
								catchWebByMetaSearch.setTitle(title);
								//源网址
								String addressurl= titleAndUrl.attr("href");
								catchWebByMetaSearch.setSourceaddress(addressurl);
							}

							
							catchWebByMetaSearch.setOrgId(orgId);
							catchWebByMetaSearch.setTid(tid);
							metaSearchList.add(catchWebByMetaSearch);
						}
						catchWebByMetaSearchList.addAll(metaSearchList);
					}
					return catchWebByMetaSearchList;

				} catch (ParseException e) {
					e.printStackTrace();
				} 
				return null;
			}

	@SuppressWarnings("deprecation")
	public static List<CatchWebByMetaSearch> CatchWebOfGoogle(
			List<String> urlList, String charset, Long orgId, Long tid) {
		try {
			List<CatchWebByMetaSearch> catchWebByMetaSearchList = new ArrayList<CatchWebByMetaSearch>();
			for (int i = 0; i < urlList.size(); i++) {
				Thread.sleep(1000*5);
				CatchWebByMetaSearch catchWebByMetaSearch = new CatchWebByMetaSearch();
				List<CatchWebByMetaSearch> metaSearchList = new ArrayList<CatchWebByMetaSearch>();
				Document doc = null;
				String docstr=ChromeUtil.getChromeDoc(urlList.get(i));
				if(docstr==null){
					continue;
				}
				doc=Jsoup.parse(docstr);
				Elements firstElementsLink = doc.select("div[class=g]");
				//若果没有结果则不循环
				if(firstElementsLink.size()==0){
					break;
				}
				String info = doc.toString();
				for (int j = 0; j < firstElementsLink.size(); j++) {
					catchWebByMetaSearch= new CatchWebByMetaSearch();
					Elements e=firstElementsLink.get(j).select("h3");
					Elements a=firstElementsLink.get(j).select("a");
					Elements timespan=firstElementsLink.select("span[class=f nsa fwzPFf]");
					System.out.println(e.get(0).text());
					System.out.println(a.get(0).attr("href"));
					catchWebByMetaSearch.setTid(tid);
					catchWebByMetaSearch.setSummary(urlList.get(i));
					catchWebByMetaSearch.setOrgId(orgId);
					catchWebByMetaSearch.setSourceaddress(a.get(0).attr("href"));
					catchWebByMetaSearch.setTitle(e.get(0).text());
					if(timespan.size()>0){
						System.out.println(timespan.get(j).text());
						if(timespan.get(j).text().contains("ago")) {
							catchWebByMetaSearch.setPublishDate(DateUtil.getCreateDate());
						}else {
							String date=PublishDateUtil.getPublishDate(timespan.get(j).text());
							System.out.println(date);
							catchWebByMetaSearch.setPublishDate(date);
						}

					}
					metaSearchList.add(catchWebByMetaSearch);
				}
				catchWebByMetaSearchList.addAll(metaSearchList);
			}
			return catchWebByMetaSearchList;

		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
	public static void main(String[] args) {
		String uri="https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&rsv_dl=ns_pc&lqst=1&x_bd_lqst=1&word=东莞振华 电池";
		String uri_code = Utility.encodURI(uri.toString())
				.replaceAll("%2520", "+").replaceAll("%25", "%")
				.replaceAll("%20", "+");
		System.out.println(uri_code);
	}

}
