package com.zzsn.paser;

import com.zzsn.cache.MemcachedUtils;
import com.zzsn.docinfo.DocInfo;
import com.zzsn.entity.Site;
import com.zzsn.entity.SiteTemplate;
import com.zzsn.search.extractor.ContentFileFinder;
import com.zzsn.search.extractor.DefaultMsg;
import com.zzsn.utility.model.CatchWebByMetaSearch;
import com.zzsn.utility.util.DateUtil;
import com.zzsn.utility.util.Utility;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class SourceTemplateByTag {
	
//  从缓存中获取对应的模板
  public SiteTemplate getSiteTemp(String infourl){
      SiteTemplate sTemplate=new SiteTemplate();
      try {
    	  String domain=new URL(infourl).getHost();
    	  Site site=(Site) MemcachedUtils.get("domainUri_"+domain);

              if (null != site.getMatchTitle()) {
                  sTemplate.setMatchTitle(site.getMatchTitle());
              }
              if (null != site.getMatchSummary()) {
                  sTemplate.setMatchSummary(site.getMatchSummary());
              }
              if (null != site.getMatchContent()) {
                  sTemplate.setMatchContent(site.getMatchContent());
              }
              if (null != site.getMatchTitle()) {
                  sTemplate.setMatchAuthor(site.getMatchAuthor());

              }
              if (null != site.getMatchOrigin()) {
                  sTemplate.setMatchOrigin(site.getMatchOrigin());
              }
              if (null != site.getMatchPublishDate()) {
                  sTemplate.setMatchPublishDate(site.getMatchPublishDate());
              }

      }catch (Exception e){
         return null;
      }
      return sTemplate;
  }
 static List<Site> sList=new ArrayList<Site>();
//保存有问题的站点
  public static void saveNoTempSite( CatchWebByMetaSearch cwbm ){
      try {

          String infourl = cwbm.getSourceaddress();
          String domainurl = new URL(infourl).getHost();
          Site site = new Site();
          site.setDomainUri(domainurl);
          site.setUri(infourl);
          site.setName(cwbm.getSourcesite());

          Object cacheObj=MemcachedUtils.get("tempSite");
		  if (null==cacheObj ) {
			  List<Site> s2List=new ArrayList<Site>();
			  sList=s2List;
		  }
          sList.add(site);
          MemcachedUtils.set("tempSite", sList,60*60*24);
          System.out.println("保存成功！！");
      }catch (Exception e){
    	  e.printStackTrace();
          System.out.println("保存失败");
      }
  }
  
  
  public static  DocInfo doPaserByTag(String htmlContent, DocInfo docInfo, SiteTemplate siteTemplate){
      DefaultMsg dm = new DefaultMsg();
      Document doc =  Jsoup.parse(htmlContent);
      System.out.println("===========doPaserByTag");
      if(null!=siteTemplate.getMatchTitle()&&siteTemplate.getMatchTitle().length()>0) {
          //标题
          String title =paseElementByCSS(doc,siteTemplate.getMatchTitle());
          if (StringUtils.isNotEmpty(title)) {
              docInfo.setTitle(title.replace("...", ""));
          }
      }
      if(null!=siteTemplate.getMatchContent()&&siteTemplate.getMatchContent().length()>0) {
          Elements elementsByTag = doc.select(siteTemplate.getMatchContent());
          String contentWithTag = Utility.RemoveUselessHTMLTagX(elementsByTag.html());
			System.out.println("==========="+elementsByTag);
//			String contentWithTag =paseElementByCSS(doc,siteTemplate.getMatchContent());
          if (contentWithTag == null || contentWithTag.trim().length() == 0) {
              return docInfo;
          }
          docInfo.setContentWithTag(ContentFileFinder.rmHtmlImgOrAtag(contentWithTag));
          docInfo.setContentNoTag(Utility.TransferHTML2Text(contentWithTag).replaceAll("\\n",""));
      }
      if(null!=siteTemplate.getMatchAuthor()&&siteTemplate.getMatchAuthor().length()>0) {
          String author=paseElementByCSS(doc,siteTemplate.getMatchAuthor());
          if(author.length()>0) {
              docInfo.setAuthor(author);
          }
      }
      if(null!=siteTemplate.getMatchPublishDate()&&siteTemplate.getMatchPublishDate().length()>0) {
          String publishDate=paseElementByCSS(doc,siteTemplate.getMatchPublishDate());
          if(publishDate.length()>0) {
              docInfo.setPublishDate(DateUtil.getPublishDate(publishDate));
          }
      }
      if(null!=siteTemplate.getMatchSummary()&&siteTemplate.getMatchSummary().length()>0) {
          String summary=paseElementByCSS(doc,siteTemplate.getMatchSummary());
          if(summary.length()>0) {
              docInfo.setSummary(summary);
          }
      }
      if(null!=siteTemplate.getMatchOrigin()&&siteTemplate.getMatchOrigin().length()>0) {
          String origin=paseElementByCSS(doc,siteTemplate.getMatchOrigin());
          if(origin.length()>0) {
              docInfo.setOrigin(origin);
          }
      }
//      this.buildProcessItem(docInfo);
      return docInfo;

  }
  public static String paseElementByCSS(Document doc,String tag){
      String msg="";
      try {
          Elements elements = doc.select(tag);
          if (elements.size() > 0) {
              msg = elements.get(0).text().trim();
          }
      }catch (Exception e){
          e.printStackTrace();
      }finally {
          return msg;
      }
//		return msg;
  }

  
}
