提交 8b1d10a5 作者: 刘伟刚

标题去重更新

上级 967180fd
maxId=23042000000124 maxId=23101300001722
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -186,6 +186,18 @@ ...@@ -186,6 +186,18 @@
<artifactId>commons-pool2</artifactId> <artifactId>commons-pool2</artifactId>
<version>2.6.2</version> <version>2.6.2</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
......
package com.zzsn.controller;
import com.zzsn.entity.*;
import com.zzsn.service.BaseDataService;
import com.zzsn.service.impl.DataSyncServiceImpl;
import com.zzsn.utils.DateUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import java.util.*;
@RestController
@RequestMapping("/basedata")
public class BaseDataController {
@Autowired
private BaseDataService baseDataService;
@Autowired
private DataSyncServiceImpl dataSyncService;
@RequestMapping(value ="/test", method = RequestMethod.GET)
@ResponseBody
public String test(){
return "hello!";
}
/**
* 新增或修改
* @param basedata 数据对象
* @return 结果
*/
@PostMapping("/save")
public ResultModel saveOrUpdate (@RequestBody Basedatavo basedata){
String date = DateUtil.format(new Date(), "YYYY-MM-dd HH:MM:ss");
String title = basedata.getTitle();
String publishDate = basedata.getPublishDate()==null?date:basedata.getPublishDate();
String sourceaddress = basedata.getSourceaddress();
String content = basedata.getContent();
String contentNoTag = basedata.getContentNoTag();
String summary = basedata.getSummary();
String origin = basedata.getOrigin()==null?"":basedata.getOrigin();
String orientation = basedata.getOrientation()==null?"1":basedata.getOrientation();
Map<String, Object> map=new HashMap<>();
Basedata bsdata = new Basedata();
bsdata.setTitle(title);
bsdata.setPublishDate(publishDate);
bsdata.setSourceaddress(sourceaddress);
bsdata.setContent(content);
bsdata.setContentNoTag(contentNoTag);
bsdata.setSummary(summary);
bsdata.setOrigin(origin);
bsdata.setOrientation(orientation);
// bsdata.setKeywords(keywords);
String bid = this.baseDataService.getserialno().toString();
Long idl =Long.parseLong(bid);
bsdata.setId(idl + "");
bsdata.setSid(290622l);
bsdata.setDelflag(2l);
bsdata.setIsdelete(0);
bsdata.setSourceType("news");
bsdata.setFromWhere("接口录入");
bsdata.setCreateDate(date);
List<Basedata> bsDataList=new ArrayList<>();
bsDataList.add(bsdata);
map.put("data",bsDataList);
BasedataType basedataType = new BasedataType();
basedataType.setId(Long.parseLong(bid.replace("0000","")));
basedataType.setBid(idl);
basedataType.setTid(16865l);
basedataType.setOrgId(3942l);
basedataType.setPublishDate(publishDate);
basedataType.setCreateDate(date);
basedataType.setDelflag(2l);
basedataType.setRelevance(0l);
List<BasedataType> typeList=new ArrayList<>();
typeList.add(basedataType);
//4.插入之前对basedata中的内容包含图片的进行base64转码,先不改变原content中src的内容
Map<String, Object> map1 = dataSyncService.imgHandler((List<Basedata>) map.get("data"));
try {
// dataSyncService.insertBatch((List<Basedata>) secretMap.get("data"), (List<BasedataType>) secretMap.get("typeData"),
// (List<BaseDataImage>) map1.get("imageList"), (List<CesSysWeb>) localMap.get("exitWebList")
// , (List<CesSysWeb>) localMap.get("newWebList"), (List<BaseDataWebMid>) localMap.get("baseDataWebList"));
dataSyncService.insertBatch((List<Basedata>) map.get("data"), typeList,
(List<BaseDataImage>) map1.get("imageList"), null
, null, null);
}catch (Exception e){
return ResultModel.OK("插入失败");
}
String id="插入数据的id: "+bid;
return ResultModel.OK(id);
}
}
package com.zzsn.entity;
import lombok.Data;
@Data
public class Basedatavo {
String title;
String publishDate;
String sourceaddress;
String content;
String contentNoTag ;
String summary ;
String origin;
String orientation ;
}
...@@ -17,6 +17,9 @@ public abstract interface BaseDataMapper extends BaseMapper<Basedata> { ...@@ -17,6 +17,9 @@ public abstract interface BaseDataMapper extends BaseMapper<Basedata> {
@DS("master") @DS("master")
public abstract int queryByAddress(@Param("source") String paramString); public abstract int queryByAddress(@Param("source") String paramString);
@DS("master")
public abstract int queryByTitle(@Param("title") String title);
public abstract int save(@Param("item") Basedata paramBasedata); public abstract int save(@Param("item") Basedata paramBasedata);
public abstract Basedata getContent(); public abstract Basedata getContent();
......
...@@ -149,6 +149,14 @@ ...@@ -149,6 +149,14 @@
CIS_ANS_BASEDATA CIS_ANS_BASEDATA
WHERE sourceaddress = #{source} WHERE sourceaddress = #{source}
</select> </select>
<select id="queryByTitle" resultType="java.lang.Integer">
SELECT
COUNT( ID )
FROM
CIS_ANS_BASEDATA
WHERE TITLE = #{title}
</select>
<select id="getContent" resultMap="basedataMap"> <select id="getContent" resultMap="basedataMap">
select CONTENT from CIS_ANS_BASEDATA where ID =20092900005829 select CONTENT from CIS_ANS_BASEDATA where ID =20092900005829
</select> </select>
......
...@@ -269,12 +269,27 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im ...@@ -269,12 +269,27 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
public boolean isExist(Basedata basedata){ public boolean isExist(Basedata basedata){
// logger.info("查询网址是否已经存在科工局数据库中{}",basedata); // logger.info("查询网址是否已经存在科工局数据库中{}",basedata);
logger.info("查询网址是否已经存在科工局数据库中{}",basedata.getId()); logger.info("查询网址是否已经存在科工局数据库中{}",basedata.getId());
int i = dataMapper.queryByAddress(basedata.getSourceaddress()); int i=0;
try {
i = dataMapper.queryByAddress(basedata.getSourceaddress());
}catch (Exception e){
i=1;
}
// QueryWrapper<Basedata> queryWrapper = new QueryWrapper<>(); // QueryWrapper<Basedata> queryWrapper = new QueryWrapper<>();
// queryWrapper.eq("sourceaddress",basedata.getSourceaddress()); // queryWrapper.eq("sourceaddress",basedata.getSourceaddress());
if(i>0){ if(i>0){
// logger.info("该网址已经存在科工局数据库中{}",basedata.getId()); // logger.info("该网址已经存在科工局数据库中{}",basedata.getId());
logger.info("该网址已经存在科工局数据库中{}",basedata.getSourceaddress()); logger.info("该网址已经存在科工局数据库中{}",basedata.getSourceaddress());
}else {
try {
i = dataMapper.queryByTitle(basedata.getTitle());
} catch (Exception e) {
i=1;
}
if(i>0){
logger.info("该新闻已经存在科工局数据库中{}",basedata.getTitle());
}
} }
return i>0; return i>0;
} }
...@@ -289,7 +304,11 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im ...@@ -289,7 +304,11 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
for (Basedata basedata : data) { for (Basedata basedata : data) {
Map<String, FileTag> contentFileTag = null; Map<String, FileTag> contentFileTag = null;
try{ try{
contentFileTag =ContentFileFinder.getContentFileTag(basedata.getContent(), basedata.getSourceaddress()); try {
contentFileTag = ContentFileFinder.getContentFileTag(basedata.getContent(), basedata.getSourceaddress());
}catch (Exception e){
result.add(basedata);
}
if(contentFileTag==null || contentFileTag.size()<1){ if(contentFileTag==null || contentFileTag.size()<1){
result.add(basedata); result.add(basedata);
continue; continue;
...@@ -498,14 +517,15 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im ...@@ -498,14 +517,15 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
/** /**
* 对content中的视频进行处理 * 对content中的视频进行处理
* @param data * @param map1
* @return * @return
*/ */
public Map<String, Object> videoHandler(List<Basedata> data) { public Map<String, Object> videoHandler(Map<String, Object> map) {
logger.info("对content中的视频图片进行处理:{}",data); List<Basedata> data=(List<Basedata>)map.get("basedata");
Map<String, Object> map = new HashMap<>(); logger.info("对content中的视频图片进行处理:{}",data.size());
// Map<String, Object> map = new HashMap<>();
List<Basedata> result = new ArrayList<>(); List<Basedata> result = new ArrayList<>();
List<BaseDataImage> imageList =new ArrayList<>(); List<BaseDataImage> videoList =new ArrayList<>();
if(data!=null&&data.size()>0){ if(data!=null&&data.size()>0){
for (Basedata basedata : data) { for (Basedata basedata : data) {
Map<String, FileTag> contentFileTag = null; Map<String, FileTag> contentFileTag = null;
...@@ -515,17 +535,19 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im ...@@ -515,17 +535,19 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
result.add(basedata); result.add(basedata);
continue; continue;
} }
try {
Basedata base = new Basedata(); Basedata base = new Basedata();
BeanUtils.copyProperties(basedata,base); BeanUtils.copyProperties(basedata, base);
logger.info("对处理数据的标题:{}",base.getTitle());
//遍历map //遍历map
Document document = Jsoup.parse(basedata.getContent()); Document document = Jsoup.parse(basedata.getContent());
for (String key:contentFileTag.keySet()){ for (String key : contentFileTag.keySet()) {
FileTag value = contentFileTag.get(key); FileTag value = contentFileTag.get(key);
//使用MD5加密:ID+图片的原网址 //使用MD5加密:ID+图片的原网址
String md5 = DigestUtils.md5DigestAsHex((value.getAbsolutePath()).getBytes(StandardCharsets.UTF_8)); String md5 = DigestUtils.md5DigestAsHex((value.getAbsolutePath()).getBytes(StandardCharsets.UTF_8));
System.out.println("加密后的md5值{}"+md5); logger.info("加密后的md5值{}" + md5);
String savePath = videoPath; String savePath = videoPath;
String id = basedata.getId()+""; String id = basedata.getId() + "";
String url = value.getAbsolutePath(); String url = value.getAbsolutePath();
String referer = basedata.getSourceaddress(); String referer = basedata.getSourceaddress();
String path = savePath; String path = savePath;
...@@ -536,19 +558,48 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im ...@@ -536,19 +558,48 @@ public class DataSyncServiceImpl extends ServiceImpl<BaseDataMapper,Basedata> im
String src = keyDoc.select("video").attr("src"); String src = keyDoc.select("video").attr("src");
Elements elements = document.select("*[src=\"" + src + "\"]"); Elements elements = document.select("*[src=\"" + src + "\"]");
for (Element e : elements) { for (Element e : elements) {
e.attr("src","/static/video/" + vname ); e.attr("src", "/video/" + vname);
}
BaseDataImage baseDataImage = new BaseDataImage();
baseDataImage.setId(md5);
baseDataImage.setDataImage(vname);
baseDataImage.setBid(base.getId());
//暂不替换原content中的内容
if(StringUtils.isNotBlank(vname)){
logger.info("替换[{}]中的原图片[{}]为[{}]",base.getId(),key,md5);
System.out.println("视频tihuanchenggong");
videoList.add(baseDataImage);
base.setContent(base.getContent().replace(key,md5));
}else{
System.out.println("视频weipaqu");
base.setContent(base.getContent().replace(key,""));
} }
} }
base.setContent(document.outerHtml()); base.setContent(document.outerHtml());
result.add(base); result.add(base);
}catch (Exception e){ }catch (Exception e){
result.add(basedata);
logger.info("视频处理解析失败");
}
}catch (Exception e){
logger.info("[{}]提取img报错:[{}]",basedata.getId(),e.getMessage()); logger.info("[{}]提取img报错:[{}]",basedata.getId(),e.getMessage());
result.add(basedata);
continue; continue;
} }
} }
} }
map.put("basedata",result); List<BaseDataImage> imageList=new ArrayList<>();
map.put("imageList",imageList); try {
imageList = (List<BaseDataImage>) map.get("imageList");
if (videoList.size() > 0) {
imageList.addAll(videoList);
}
map.put("basedata", result);
map.put("imageList", imageList);
}catch (Exception e){
map.put("basedata", result);
map.put("imageList", imageList);
}
return map; return map;
} }
......
...@@ -211,13 +211,26 @@ public class ContentFileFinder { ...@@ -211,13 +211,26 @@ public class ContentFileFinder {
* @return * @return
*/ */
public static String getSuffix(String uri){ public static String getSuffix(String uri){
String suffix="";
uri = uri.replaceAll("http://|https://", ""); uri = uri.replaceAll("http://|https://", "");
Pattern p = Pattern.compile("/.+(\\.\\w{1,4})$"); Pattern p = Pattern.compile("/.+(\\.\\w{1,4})$");
Matcher m = p.matcher(uri); Matcher m = p.matcher(uri);
if(m.find()){ if(m.find()){
return m.group(1); suffix= m.group(1);
} }
return ""; if(uri.contains(".jpg")){
suffix=".jpg";
}
if(uri.contains(".png")){
suffix=".png";
}
if(uri.contains(".jpeg")){
suffix=".jpeg";
}
if(uri.contains(".gif")){
suffix=".gif";
}
return suffix;
} }
/** /**
...@@ -271,12 +284,13 @@ public class ContentFileFinder { ...@@ -271,12 +284,13 @@ public class ContentFileFinder {
fileTag.setAbsoluteTag(imgTag.outerHtml()); fileTag.setAbsoluteTag(imgTag.outerHtml());
//图片保存路径 //图片保存路径
suffix = ContentFileFinder.getSuffix(absolutePath); suffix = ContentFileFinder.getSuffix(absolutePath);
fileTag.setSuffix(suffix.substring(1));
// if(StringUtils.isNotBlank(suffix)){
// fileTag.setSuffix(suffix.substring(1)); // fileTag.setSuffix(suffix.substring(1));
// }else{ if(StringUtils.isNotBlank(suffix)){
// fileTag.setSuffix(""); fileTag.setSuffix(suffix.substring(1));
// } }else{
fileTag.setSuffix("");
continue;
}
savePath = genImgFileName(suffix); savePath = genImgFileName(suffix);
fileTag.setSavePath(savePath); fileTag.setSavePath(savePath);
//图片保存标签 //图片保存标签
......
...@@ -514,12 +514,18 @@ public class ImgBaseUtil { ...@@ -514,12 +514,18 @@ public class ImgBaseUtil {
* @param vname 下载文件名 * @param vname 下载文件名
*/ */
public static void sendDownloadInfo(String id,String url,String referer,String path,String vname ){ public static void sendDownloadInfo(String id,String url,String referer,String path,String vname ){
String param="{\"jsonrpc\": \"2.0\", \"id\": \""+id+"\", \"method\": \"aria2.addUri\"," for (int i = 0; i < 3; i++) {
+ "\"params\": [[\""+url+"\"], {\"referer\":\""+referer+"\" , \"dir\": \""+path+"\",\"out\":\""+vname+"\"}]}"; try {
sendPost_body("http://localhost:6800/jsonrpc",param); String param = "{\"jsonrpc\": \"2.0\", \"id\": \"" + id + "\", \"method\": \"aria2.addUri\","
+ "\"params\": [[\"" + url + "\"], {\"referer\":\"" + referer + "\" , \"dir\": \"" + path + "\",\"out\":\"" + vname + "\"}]}";
sendPost_body("http://localhost:6800/jsonrpc", param);
}catch (Exception e){
}
}
} }
public static String sendPost_body(String arl, String s) {
public static String sendPost_body(String arl, String s) {
// 创建url资源 // 创建url资源
OutputStreamWriter out = null; OutputStreamWriter out = null;
URL url; URL url;
......
...@@ -43,8 +43,8 @@ spring: ...@@ -43,8 +43,8 @@ spring:
# 多数据源配置 # 多数据源配置
master: master:
url: jdbc:oracle:thin:@localhost:1521:orcl url: jdbc:oracle:thin:@localhost:1521:orcl
username: cis1 username: cistest
password: cis1 password: cistest
driver-class-name: oracle.jdbc.driver.OracleDriver driver-class-name: oracle.jdbc.driver.OracleDriver
# slave: # slave:
# url: jdbc:oracle:thin:@localhost:1521:orcl # url: jdbc:oracle:thin:@localhost:1521:orcl
...@@ -163,3 +163,6 @@ proxy: ...@@ -163,3 +163,6 @@ proxy:
video: video:
path: /home/ubuntu/video/ path: /home/ubuntu/video/
excelpath: C:\\Users\\WIN10\\Desktop\\测试数据库环境\\22222.xls
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论