提交 8b1d10a5 作者: 刘伟刚

标题去重更新

上级 967180fd
maxId=23042000000124
maxId=23101300001722
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -186,6 +186,18 @@
<artifactId>commons-pool2</artifactId>
<version>2.6.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
</dependencies>
<build>
......
package com.zzsn.controller;
import com.zzsn.entity.*;
import com.zzsn.service.BaseDataService;
import com.zzsn.service.impl.DataSyncServiceImpl;
import com.zzsn.utils.DateUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import java.util.*;
@RestController
@RequestMapping("/basedata")
public class BaseDataController {
@Autowired
private BaseDataService baseDataService;
@Autowired
private DataSyncServiceImpl dataSyncService;
@RequestMapping(value ="/test", method = RequestMethod.GET)
@ResponseBody
public String test(){
return "hello!";
}
/**
* 新增或修改
* @param basedata 数据对象
* @return 结果
*/
@PostMapping("/save")
public ResultModel saveOrUpdate (@RequestBody Basedatavo basedata){
String date = DateUtil.format(new Date(), "YYYY-MM-dd HH:MM:ss");
String title = basedata.getTitle();
String publishDate = basedata.getPublishDate()==null?date:basedata.getPublishDate();
String sourceaddress = basedata.getSourceaddress();
String content = basedata.getContent();
String contentNoTag = basedata.getContentNoTag();
String summary = basedata.getSummary();
String origin = basedata.getOrigin()==null?"":basedata.getOrigin();
String orientation = basedata.getOrientation()==null?"1":basedata.getOrientation();
Map<String, Object> map=new HashMap<>();
Basedata bsdata = new Basedata();
bsdata.setTitle(title);
bsdata.setPublishDate(publishDate);
bsdata.setSourceaddress(sourceaddress);
bsdata.setContent(content);
bsdata.setContentNoTag(contentNoTag);
bsdata.setSummary(summary);
bsdata.setOrigin(origin);
bsdata.setOrientation(orientation);
// bsdata.setKeywords(keywords);
String bid = this.baseDataService.getserialno().toString();
Long idl =Long.parseLong(bid);
bsdata.setId(idl + "");
bsdata.setSid(290622l);
bsdata.setDelflag(2l);
bsdata.setIsdelete(0);
bsdata.setSourceType("news");
bsdata.setFromWhere("接口录入");
bsdata.setCreateDate(date);
List<Basedata> bsDataList=new ArrayList<>();
bsDataList.add(bsdata);
map.put("data",bsDataList);
BasedataType basedataType = new BasedataType();
basedataType.setId(Long.parseLong(bid.replace("0000","")));
basedataType.setBid(idl);
basedataType.setTid(16865l);
basedataType.setOrgId(3942l);
basedataType.setPublishDate(publishDate);
basedataType.setCreateDate(date);
basedataType.setDelflag(2l);
basedataType.setRelevance(0l);
List<BasedataType> typeList=new ArrayList<>();
typeList.add(basedataType);
//4.插入之前对basedata中的内容包含图片的进行base64转码,先不改变原content中src的内容
Map<String, Object> map1 = dataSyncService.imgHandler((List<Basedata>) map.get("data"));
try {
// dataSyncService.insertBatch((List<Basedata>) secretMap.get("data"), (List<BasedataType>) secretMap.get("typeData"),
// (List<BaseDataImage>) map1.get("imageList"), (List<CesSysWeb>) localMap.get("exitWebList")
// , (List<CesSysWeb>) localMap.get("newWebList"), (List<BaseDataWebMid>) localMap.get("baseDataWebList"));
dataSyncService.insertBatch((List<Basedata>) map.get("data"), typeList,
(List<BaseDataImage>) map1.get("imageList"), null
, null, null);
}catch (Exception e){
return ResultModel.OK("插入失败");
}
String id="插入数据的id: "+bid;
return ResultModel.OK(id);
}
}
package com.zzsn.entity;
import lombok.Data;
@Data
public class Basedatavo {
String title;
String publishDate;
String sourceaddress;
String content;
String contentNoTag ;
String summary ;
String origin;
String orientation ;
}
......@@ -17,6 +17,9 @@ public abstract interface BaseDataMapper extends BaseMapper<Basedata> {
@DS("master")
public abstract int queryByAddress(@Param("source") String paramString);
@DS("master")
public abstract int queryByTitle(@Param("title") String title);
public abstract int save(@Param("item") Basedata paramBasedata);
public abstract Basedata getContent();
......
......@@ -66,7 +66,7 @@
inner join (select t.*
from CIS_ANS_BASEDATA_TYPE T
where T.ORG_ID = 3942
) N on N.bid = B.id and b.id <![CDATA[>]]> #{maxId} order by B.ID asc
) N on N.bid = B.id and b.id <![CDATA[>]]> #{maxId} order by B.ID asc
<!-- ) N on N.bid = B.id and b.id=21122800024724 order by B.ID asc-->
</select>
<insert id="save" parameterType="com.zzsn.entity.Basedata" useGeneratedKeys="false">
......@@ -149,6 +149,14 @@
CIS_ANS_BASEDATA
WHERE sourceaddress = #{source}
</select>
<select id="queryByTitle" resultType="java.lang.Integer">
SELECT
COUNT( ID )
FROM
CIS_ANS_BASEDATA
WHERE TITLE = #{title}
</select>
<select id="getContent" resultMap="basedataMap">
select CONTENT from CIS_ANS_BASEDATA where ID =20092900005829
</select>
......@@ -157,4 +165,4 @@
{call p_getSerialNo(1,#{serialNo,mode=OUT,jdbcType=VARCHAR},#{e,mode=OUT,jdbcType=VARCHAR})}
]]>
</select>
</mapper>
\ No newline at end of file
</mapper>
......@@ -269,12 +269,27 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
public boolean isExist(Basedata basedata){
// logger.info("查询网址是否已经存在科工局数据库中{}",basedata);
logger.info("查询网址是否已经存在科工局数据库中{}",basedata.getId());
int i = dataMapper.queryByAddress(basedata.getSourceaddress());
int i=0;
try {
i = dataMapper.queryByAddress(basedata.getSourceaddress());
}catch (Exception e){
i=1;
}
// QueryWrapper<Basedata> queryWrapper = new QueryWrapper<>();
// queryWrapper.eq("sourceaddress",basedata.getSourceaddress());
if(i>0){
// logger.info("该网址已经存在科工局数据库中{}",basedata.getId());
logger.info("该网址已经存在科工局数据库中{}",basedata.getSourceaddress());
}else {
try {
i = dataMapper.queryByTitle(basedata.getTitle());
} catch (Exception e) {
i=1;
}
if(i>0){
logger.info("该新闻已经存在科工局数据库中{}",basedata.getTitle());
}
}
return i>0;
}
......@@ -289,7 +304,11 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
for (Basedata basedata : data) {
Map<String, FileTag> contentFileTag = null;
try{
contentFileTag =ContentFileFinder.getContentFileTag(basedata.getContent(), basedata.getSourceaddress());
try {
contentFileTag = ContentFileFinder.getContentFileTag(basedata.getContent(), basedata.getSourceaddress());
}catch (Exception e){
result.add(basedata);
}
if(contentFileTag==null || contentFileTag.size()<1){
result.add(basedata);
continue;
......@@ -498,14 +517,15 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
/**
* 对content中的视频进行处理
* @param data
* @param map1
* @return
*/
public Map<String, Object> videoHandler(List<Basedata> data) {
logger.info("对content中的视频图片进行处理:{}",data);
Map<String, Object> map = new HashMap<>();
public Map<String, Object> videoHandler(Map<String, Object> map) {
List<Basedata> data=(List<Basedata>)map.get("basedata");
logger.info("对content中的视频图片进行处理:{}",data.size());
// Map<String, Object> map = new HashMap<>();
List<Basedata> result = new ArrayList<>();
List<BaseDataImage> imageList =new ArrayList<>();
List<BaseDataImage> videoList =new ArrayList<>();
if(data!=null&&data.size()>0){
for (Basedata basedata : data) {
Map<String, FileTag> contentFileTag = null;
......@@ -515,40 +535,71 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
result.add(basedata);
continue;
}
Basedata base = new Basedata();
BeanUtils.copyProperties(basedata,base);
//遍历map
Document document = Jsoup.parse(basedata.getContent());
for (String key:contentFileTag.keySet()){
FileTag value = contentFileTag.get(key);
//使用MD5加密:ID+图片的原网址
String md5 = DigestUtils.md5DigestAsHex((value.getAbsolutePath()).getBytes(StandardCharsets.UTF_8));
System.out.println("加密后的md5值{}"+md5);
String savePath = videoPath;
String id = basedata.getId()+"";
String url = value.getAbsolutePath();
String referer = basedata.getSourceaddress();
String path = savePath;
String vname = md5 + ".mp4";
ImgBaseUtil.sendDownloadInfo(id, url, referer, path, vname);
//保存图片到本地,本地路径文件夹
Document keyDoc = Jsoup.parse(key);
String src = keyDoc.select("video").attr("src");
Elements elements = document.select("*[src=\"" + src + "\"]");
for (Element e : elements) {
e.attr("src","/static/video/" + vname );
try {
Basedata base = new Basedata();
BeanUtils.copyProperties(basedata, base);
logger.info("对处理数据的标题:{}",base.getTitle());
//遍历map
Document document = Jsoup.parse(basedata.getContent());
for (String key : contentFileTag.keySet()) {
FileTag value = contentFileTag.get(key);
//使用MD5加密:ID+图片的原网址
String md5 = DigestUtils.md5DigestAsHex((value.getAbsolutePath()).getBytes(StandardCharsets.UTF_8));
logger.info("加密后的md5值{}" + md5);
String savePath = videoPath;
String id = basedata.getId() + "";
String url = value.getAbsolutePath();
String referer = basedata.getSourceaddress();
String path = savePath;
String vname = md5 + ".mp4";
ImgBaseUtil.sendDownloadInfo(id, url, referer, path, vname);
//保存图片到本地,本地路径文件夹
Document keyDoc = Jsoup.parse(key);
String src = keyDoc.select("video").attr("src");
Elements elements = document.select("*[src=\"" + src + "\"]");
for (Element e : elements) {
e.attr("src", "/video/" + vname);
}
BaseDataImage baseDataImage = new BaseDataImage();
baseDataImage.setId(md5);
baseDataImage.setDataImage(vname);
baseDataImage.setBid(base.getId());
//暂不替换原content中的内容
if(StringUtils.isNotBlank(vname)){
logger.info("替换[{}]中的原图片[{}]为[{}]",base.getId(),key,md5);
System.out.println("视频tihuanchenggong");
videoList.add(baseDataImage);
base.setContent(base.getContent().replace(key,md5));
}else{
System.out.println("视频weipaqu");
base.setContent(base.getContent().replace(key,""));
}
}
base.setContent(document.outerHtml());
result.add(base);
}catch (Exception e){
result.add(basedata);
logger.info("视频处理解析失败");
}
base.setContent(document.outerHtml());
result.add(base);
}catch (Exception e){
logger.info("[{}]提取img报错:[{}]",basedata.getId(),e.getMessage());
result.add(basedata);
continue;
}
}
}
map.put("basedata",result);
map.put("imageList",imageList);
List<BaseDataImage> imageList=new ArrayList<>();
try {
imageList = (List<BaseDataImage>) map.get("imageList");
if (videoList.size() > 0) {
imageList.addAll(videoList);
}
map.put("basedata", result);
map.put("imageList", imageList);
}catch (Exception e){
map.put("basedata", result);
map.put("imageList", imageList);
}
return map;
}
......
......@@ -18,35 +18,35 @@ import org.jsoup.select.Elements;
/**
* 获取正文中的图片或者文件
* 创建人:李东亮
* 创建时间:2016-8-30 下午5:25:04
* 创建人:李东亮
* 创建时间:2016-8-30 下午5:25:04
* 公司 :郑州数能软件科技有限公司
* @version 1.0
* @version 1.0
*
*/
public class ContentFileFinder {
/**
* 获取父路径
* 创建人: 李东亮
* 创建时间: 2015-7-6 下午3:17:44
* 创建人: 李东亮
* 创建时间: 2015-7-6 下午3:17:44
* @version 1.0
* @param path
* @return
* @throws IOException
* @throws IOException
*/
public static String getDirPath(String path) {
path = path.substring(0, path.lastIndexOf("/")) ;
return path;
}
/**
* 去除路径中的./
* 创建人: 李东亮
* 创建时间: 2015-7-6 下午3:43:00
* 创建人: 李东亮
* 创建时间: 2015-7-6 下午3:43:00
* @version 1.0
* @param currentPageURL,imgPath
* @return
* @throws IOException
* @throws IOException
*/
public static String formatPath(String currentPageURL,String imgPath) {
String start="";
......@@ -59,7 +59,7 @@ public class ContentFileFinder {
if(imgPath.startsWith("/")){
//add lihuawei 增加双斜杠判断图片 如果开始时双斜杠就增加http:
if(imgPath.startsWith("//")){
return start+imgPath.replace("//", "");
}
currentPageURL = currentPageURL.replace(start, "");
......@@ -70,10 +70,10 @@ public class ContentFileFinder {
String domain = currentPageURL.substring(0, subIndex);
return start+domain+imgPath;
}
//相对路径
String path = currentPageURL+"/"+imgPath;
path = path.replaceAll(start, "D:/");
File f = new File(path);
String filePath="";
......@@ -98,7 +98,7 @@ public class ContentFileFinder {
if(imgPath.startsWith("/")){
//add lihuawei 增加双斜杠判断图片 如果开始时双斜杠就增加http:
if(imgPath.startsWith("//")){
return start+imgPath.replace("//", "");
}
currentPageURL = currentPageURL.replace(start, "");
......@@ -109,10 +109,10 @@ public class ContentFileFinder {
String domain = currentPageURL.substring(0, subIndex);
return start+domain+imgPath;
}
//相对路径
String path = currentPageURL+"/"+imgPath;
path = path.replaceAll(start, "D:/");
File f = new File(path);
String filePath="";
......@@ -126,11 +126,11 @@ public class ContentFileFinder {
result = result.replaceAll("\\\\", "/");
return result;
}
/**
* 生成图片文件保存路径
* 创建人: 李东亮
* 创建时间: 2016-3-23 下午2:50:33
* 创建人: 李东亮
* 创建时间: 2016-3-23 下午2:50:33
* @version 1.0
* @return
*/
......@@ -139,11 +139,11 @@ public class ContentFileFinder {
String uuid = UUID.randomUUID().toString();
return dir+"/"+uuid+suffix;
}
/**
* 确保有src属性并且src属性指向正确的图片地址
* 创建人: 李东亮
* 创建时间: 2016-6-6 下午1:46:03
* 创建人: 李东亮
* 创建时间: 2016-6-6 下午1:46:03
* @version 1.0
* @param imgTag
* @return
......@@ -166,11 +166,11 @@ public class ContentFileFinder {
imgTag.attr("src", imgTag.attr(firstSrcAtt));
return imgTag;
}
/**
* 获取图片的绝对路径
* 创建人: 李东亮
* 创建时间: 2016-6-6 下午2:05:02
* 创建人: 李东亮
* 创建时间: 2016-6-6 下午2:05:02
* @version 1.0
* @param element
* @param uri
......@@ -194,36 +194,49 @@ public class ContentFileFinder {
//add lihuawei jsoup有图片补全功能直接使用,这样拿到的链接都是正确的
element.setBaseUri(uri);
String absolutePath1= element.absUrl(linkAtt);
//String puriDir = getDirPath(uri);
//absolutePath = formatPath(puriDir,absolutePath1);
return absolutePath1;
}
return absolutePath;
}
/**
* 获取后缀名
* 创建人: 李东亮
* 创建时间: 2016-8-30 下午5:00:39
* 创建人: 李东亮
* 创建时间: 2016-8-30 下午5:00:39
* @version 1.0
* @param uri
* @return
*/
public static String getSuffix(String uri){
String suffix="";
uri = uri.replaceAll("http://|https://", "");
Pattern p = Pattern.compile("/.+(\\.\\w{1,4})$");
Matcher m = p.matcher(uri);
if(m.find()){
return m.group(1);
suffix= m.group(1);
}
return "";
if(uri.contains(".jpg")){
suffix=".jpg";
}
if(uri.contains(".png")){
suffix=".png";
}
if(uri.contains(".jpeg")){
suffix=".jpeg";
}
if(uri.contains(".gif")){
suffix=".gif";
}
return suffix;
}
/**
* 获取正文中的文件标签,包含正文中的图片和附件
* 创建人: 李东亮
* 创建时间: 2016-9-8 下午3:01:09
* 创建人: 李东亮
* 创建时间: 2016-9-8 下午3:01:09
* @version 1.0
* @param content
* @param sourceaddress
......@@ -271,12 +284,13 @@ public class ContentFileFinder {
fileTag.setAbsoluteTag(imgTag.outerHtml());
//图片保存路径
suffix = ContentFileFinder.getSuffix(absolutePath);
fileTag.setSuffix(suffix.substring(1));
// if(StringUtils.isNotBlank(suffix)){
// fileTag.setSuffix(suffix.substring(1));
// }else{
// fileTag.setSuffix("");
// }
// fileTag.setSuffix(suffix.substring(1));
if(StringUtils.isNotBlank(suffix)){
fileTag.setSuffix(suffix.substring(1));
}else{
fileTag.setSuffix("");
continue;
}
savePath = genImgFileName(suffix);
fileTag.setSavePath(savePath);
//图片保存标签
......
......@@ -514,12 +514,18 @@ public class ImgBaseUtil {
* @param vname 下载文件名
*/
public static void sendDownloadInfo(String id,String url,String referer,String path,String vname ){
String param="{\"jsonrpc\": \"2.0\", \"id\": \""+id+"\", \"method\": \"aria2.addUri\","
+ "\"params\": [[\""+url+"\"], {\"referer\":\""+referer+"\" , \"dir\": \""+path+"\",\"out\":\""+vname+"\"}]}";
sendPost_body("http://localhost:6800/jsonrpc",param);
for (int i = 0; i < 3; i++) {
try {
String param = "{\"jsonrpc\": \"2.0\", \"id\": \"" + id + "\", \"method\": \"aria2.addUri\","
+ "\"params\": [[\"" + url + "\"], {\"referer\":\"" + referer + "\" , \"dir\": \"" + path + "\",\"out\":\"" + vname + "\"}]}";
sendPost_body("http://localhost:6800/jsonrpc", param);
}catch (Exception e){
}
}
}
public static String sendPost_body(String arl, String s) {
public static String sendPost_body(String arl, String s) {
// 创建url资源
OutputStreamWriter out = null;
URL url;
......
......@@ -43,8 +43,8 @@ spring:
# 多数据源配置
master:
url: jdbc:oracle:thin:@localhost:1521:orcl
username: cis1
password: cis1
username: cistest
password: cistest
driver-class-name: oracle.jdbc.driver.OracleDriver
# slave:
# url: jdbc:oracle:thin:@localhost:1521:orcl
......@@ -163,3 +163,6 @@ proxy:
video:
path: /home/ubuntu/video/
excelpath: C:\\Users\\WIN10\\Desktop\\测试数据库环境\\22222.xls
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论