提交 4fc6b221 作者: 925993793@qq.com

事件服务功能开发,联调修改

上级 14f24f28
......@@ -193,7 +193,17 @@
<artifactId>dynamic-datasource-spring-boot-starter</artifactId>
<version>3.2.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.freemarker/freemarker -->
<dependency>
<groupId>org.freemarker</groupId>
<artifactId>freemarker</artifactId>
<version>2.3.31</version>
</dependency>
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>19.1</version>
</dependency>
</dependencies>
<build>
......
package com.zzsn.event.config;
import freemarker.template.Configuration;
import java.util.Locale;
public class FreeMarkerConfiguration {
private static Configuration freemarkerConfig;
static {
freemarkerConfig = new Configuration(Configuration.VERSION_2_3_22);
freemarkerConfig.setEncoding(Locale.getDefault(), "UTF-8");
freemarkerConfig.setClassForTemplateLoading(FreeMarkerConfiguration.class, "/template");
}
public static Configuration get() {
return freemarkerConfig;
}
}
......@@ -6,6 +6,7 @@ import cn.hutool.core.date.DateUnit;
import cn.hutool.core.date.DateUtil;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.zzsn.event.constant.Constants;
import com.zzsn.event.constant.Result;
import com.zzsn.event.entity.LabelEntity;
......@@ -26,9 +27,9 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.time.LocalDate;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
......@@ -68,10 +69,15 @@ public class EventAnalysisController {
* @date 2024/1/24
*/
@GetMapping("/total")
public Result<?> totalAndMax(@RequestParam String subjectId, @RequestParam String startTime, @RequestParam String endTime) {
Map<String, Object> map = esStatisticsService.totalAndMax(subjectId, null, null);
public Result<?> totalAndMax(@RequestParam String subjectId, @RequestParam String startTime,
@RequestParam(required = false) String endTime,
@RequestParam(defaultValue = "1") Integer type) {
Map<String, String> map = esStatisticsService.totalAndMax(subjectId, null, null, type);
if (StringUtils.isEmpty(endTime)) {
endTime = DateUtil.now();
}
long hours = DateUtil.between(DateUtil.parseDateTime(startTime), DateUtil.parseDateTime(endTime), DateUnit.HOUR);
map.put("duration", hours);
map.put("duration", String.valueOf(hours));
Object count = map.get("totalCount");
String divide = CalculateUtil.divide(String.valueOf(count), String.valueOf(hours), 2);
map.put("spread", divide);
......@@ -153,7 +159,7 @@ public class EventAnalysisController {
if (4 == type) {
wrapper.eq(SubjectAnalysis::getType, 4);
} else {
wrapper.ne(SubjectAnalysis::getType, 4);
wrapper.isNull(SubjectAnalysis::getType);
}
List<SubjectAnalysis> list = subjectAnalysisService.list(wrapper);
return Result.OK(list);
......@@ -191,34 +197,44 @@ public class EventAnalysisController {
*/
@GetMapping("/orientation")
public Result<?> orientation(@RequestParam String subjectId, @RequestParam String startTime,
@RequestParam String endTime, @RequestParam Integer type) {
@RequestParam(required = false) String endTime, @RequestParam Integer type) {
List<CountVO> list = new ArrayList<>();
String labelTypeId = "1631119596744265729";
List<LabelEntity> labelEntities = labelEntityService.listByType(labelTypeId);
AtomicLong total = new AtomicLong();
labelEntities.forEach(e -> {
CompletableFuture<CountVO> async = CompletableFuture.supplyAsync(() -> {
CountVO countVO = esStatisticsService.orientation(subjectId, e.getId(), startTime, endTime, type);
total.addAndGet(countVO.getValue());
supply(countVO, startTime, endTime, type);
return countVO;
Map<String, String> map = esStatisticsService.totalAndMax(subjectId, null, null, type);
String totalCount = map.get("totalCount");
if (!totalCount.equals("0")) {
String maxTime = map.get("maxTime");
Map<String, String> timeRangeMap = getTimeRange(startTime, endTime, maxTime, type);
startTime = timeRangeMap.get("startTime");
endTime = timeRangeMap.get("endTime");
String labelTypeId = "1631119596744265729";
List<LabelEntity> labelEntities = labelEntityService.listByType(labelTypeId);
AtomicLong total = new AtomicLong();
String finalStartTime = startTime;
String finalEndTime = endTime;
labelEntities.forEach(e -> {
CompletableFuture<CountVO> async = CompletableFuture.supplyAsync(() -> {
CountVO countVO = esStatisticsService.orientation(subjectId, e.getId(), finalStartTime, finalEndTime, type);
total.addAndGet(countVO.getValue());
supply(countVO, finalStartTime, finalEndTime, type);
return countVO;
});
try {
CountVO countVO = async.get();
list.add(countVO);
} catch (Exception ex) {
ex.printStackTrace();
}
});
try {
CountVO countVO = async.get();
list.add(countVO);
} catch (Exception ex) {
ex.printStackTrace();
}
});
for (CountVO countVO : list) {
long value = countVO.getValue();
long totalCount = total.get();
String divide = CalculateUtil.divide(String.valueOf(value), String.valueOf(totalCount));
String percentage = "0%";
if (StringUtils.isNotEmpty(divide)) {
percentage = CalculateUtil.percentage(Double.parseDouble(divide), false);
for (CountVO countVO : list) {
long value = countVO.getValue();
long totalNum = total.get();
String divide = CalculateUtil.divide(String.valueOf(value), String.valueOf(totalNum));
String percentage = "0%";
if (StringUtils.isNotEmpty(divide)) {
percentage = CalculateUtil.percentage(Double.parseDouble(divide), false);
}
countVO.setPercentage(percentage);
}
countVO.setPercentage(percentage);
}
return Result.OK(list);
}
......@@ -235,20 +251,19 @@ public class EventAnalysisController {
*/
@GetMapping("/flowData")
public Result<?> flowData(@RequestParam String subjectId, @RequestParam String startTime,
@RequestParam String endTime, @RequestParam Integer type) {
AtomicLong total = new AtomicLong();
CountVO countVO = esStatisticsService.flowData(subjectId, startTime, endTime);
total.addAndGet(countVO.getValue());
supply(countVO, startTime, endTime, type);
long value = countVO.getValue();
long totalCount = total.get();
String divide = CalculateUtil.divide(String.valueOf(value), String.valueOf(totalCount));
String percentage = "0%";
if (StringUtils.isNotEmpty(divide)) {
percentage = CalculateUtil.percentage(Double.parseDouble(divide), false);
@RequestParam(required = false) String endTime, @RequestParam Integer type) {
Map<String, String> map = esStatisticsService.totalAndMax(subjectId, null, null, type);
String totalCount = map.get("totalCount");
List<CountVO> list = new ArrayList<>();
if (!totalCount.equals("0")) {
String maxTime = map.get("maxTime");
Map<String, String> timeRangeMap = getTimeRange(startTime, endTime, maxTime, type);
startTime = timeRangeMap.get("startTime");
endTime = timeRangeMap.get("endTime");
List<CountVO> dataList = esStatisticsService.flowData(subjectId, startTime, endTime, type);
list = supplyChildren(dataList, startTime, endTime, type);
}
countVO.setPercentage(percentage);
return Result.OK(countVO);
return Result.OK(list);
}
......@@ -265,11 +280,55 @@ public class EventAnalysisController {
return Result.OK(list);
}
/**
* 获取趋势图数据的实际时间范围
*
* @param startTime 专题开始时间
* @param endTime 专题结束时间
* @param maxTime 峰值所在时间
* @param type 1-按小时;2-按天
* @author lkg
* @date 2024/4/11
*/
private Map<String, String> getTimeRange(String startTime, String endTime, String maxTime, Integer type) {
Map<String, String> map = new HashMap<>();
if (StringUtils.isEmpty(endTime)) {
endTime = DateUtil.now();
}
if (type == 1) {
DateTime beginOfDay = DateUtil.beginOfDay(DateUtil.parse(maxTime, "yyyy-MM-dd HH"));
startTime = DateUtil.formatDateTime(beginOfDay);
DateTime endOfDay = DateUtil.endOfDay(DateUtil.parse(maxTime, "yyyy-MM-dd HH"));
endTime = DateUtil.formatDateTime(endOfDay);
} else if (type == 2) {
long between = DateUtil.betweenDay(DateUtil.parseDateTime(startTime), DateUtil.parseDateTime(endTime), true);
if (between > 14) {
DateTime startDate = DateUtil.offsetDay(DateUtil.parseDate(maxTime), -7);
if (startDate.compareTo(DateUtil.parseDateTime(startTime)) > 0) {
startTime = DateUtil.formatDateTime(startDate);
}
DateTime endDate = DateUtil.offsetDay(DateUtil.parseDate(maxTime), 7);
if (endDate.compareTo(DateUtil.parseDateTime(endTime)) < 0){
endTime = DateUtil.formatDateTime(endDate);
}
}
}
map.put("startTime", startTime);
map.put("endTime", endTime);
return map;
}
//补充缺失的时间
private void supply(CountVO countVO, String startTime, String endTime, Integer type) {
List<CountVO> list = new ArrayList<>();
List<CountVO> children = countVO.getChildren();
List<CountVO> list = supplyChildren(children, startTime, endTime, type);
countVO.setChildren(list);
}
//补充缺失的时间
private List<CountVO> supplyChildren(List<CountVO> children, String startTime, String endTime, Integer type) {
List<CountVO> list = new ArrayList<>();
Map<String, CountVO> map = children.stream().collect(Collectors.toMap(CountVO::getName, item -> item, (k1, k2) -> k2));
DateTime startDate = DateUtil.parseDateTime(startTime);
DateTime endDate = DateUtil.parseDateTime(endTime);
......@@ -285,7 +344,7 @@ public class EventAnalysisController {
for (DateTime dateTime : rangeToList) {
String date = DateUtil.format(dateTime, format);
if (map.containsKey(date)) {
list.add(countVO);
list.add(map.get(date));
} else {
CountVO vo = new CountVO();
vo.setName(date);
......@@ -293,6 +352,6 @@ public class EventAnalysisController {
list.add(vo);
}
}
countVO.setChildren(list);
return list;
}
}
package com.zzsn.event.controller;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.zzsn.event.constant.Constants;
import com.zzsn.event.constant.Result;
import com.zzsn.event.service.EsService;
import com.zzsn.event.service.IEventService;
......@@ -63,7 +64,7 @@ public class EventDataController {
@RequestParam(name = "orderType", required = false) String orderType,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize) {
IPage<EventFrontVO> pageList = eventService.frontPageList(eventName, eventType, labelField,labelName,order,orderType,pageNo, pageSize);
IPage<EventFrontVO> pageList = eventService.frontPageList(eventName, eventType, labelField, labelName, order, orderType, pageNo, pageSize);
return Result.OK(pageList);
}
......@@ -91,7 +92,7 @@ public class EventDataController {
@RequestParam(name = "column", defaultValue = "publishDate") String column,
@RequestParam(name = "order", defaultValue = "desc") String order,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize) throws Exception {
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize) {
List<String> subjectIdList = new ArrayList<>();
if (StringUtils.isNotEmpty(subjectId)) {
subjectIdList.add(subjectId);
......@@ -111,7 +112,10 @@ public class EventDataController {
* @return
*/
@GetMapping(value = "/articleDetail")
public Result<?> articleDetail(@RequestParam String index, @RequestParam String id) {
public Result<?> articleDetail(@RequestParam(required = false) String index, @RequestParam String id) {
if (StringUtils.isEmpty(index)) {
index = Constants.SUBJECT_INDEX;
}
SubjectDataVo subjectDataVo = esService.queryInfo(index, id);
return Result.OK(subjectDataVo);
}
......@@ -127,8 +131,11 @@ public class EventDataController {
*/
@ApiOperation(value = "单篇文章热词", notes = "单篇文章热词")
@GetMapping(value = "/hotWords")
public Result<?> articleList(@RequestParam("index") String index, @RequestParam("id") String id,
public Result<?> articleList(@RequestParam(value = "index",required = false) String index, @RequestParam("id") String id,
@RequestParam(name = "number", defaultValue = "200") Integer number) {
if (StringUtils.isEmpty(index)) {
index = Constants.SUBJECT_INDEX;
}
List<StatisticsKeyWordVo> words = eventService.hotWords(index, id, number);
return Result.OK(words);
}
......@@ -136,18 +143,20 @@ public class EventDataController {
/**
* 相关推荐
*
* @param id 资讯id
* @param title 标题
* @param pageSize 返回条数
* @param subjectId 专题id
* @param id 资讯id
* @param title 标题
* @param pageSize 返回条数
* @author lkg
* @date 2024/4/10
*/
@GetMapping(value = "/recommendList")
public Result<?> recommendList(@RequestParam(name = "id") String id,
public Result<?> recommendList(@RequestParam(name = "subjectId") String subjectId,
@RequestParam(name = "id") String id,
@RequestParam(name = "title") String title,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize) throws Exception {
List<SubjectDataVo> recommendList = esService.queryRecommendList(id, title, pageNo, pageSize);
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize) {
List<SubjectDataVo> recommendList = esService.queryRecommendList(subjectId, id, title, pageNo, pageSize);
return Result.OK(recommendList);
}
......
package com.zzsn.event.controller;
import com.aspose.words.Document;
import com.aspose.words.SaveFormat;
import com.zzsn.event.config.FreeMarkerConfiguration;
import com.zzsn.event.service.EsService;
import com.zzsn.event.service.IEventService;
import com.zzsn.event.util.DateUtil;
import com.zzsn.event.util.ExcelExportUtil;
import com.zzsn.event.util.Utility;
import com.zzsn.event.vo.EventExcelVO;
import com.zzsn.event.vo.ExportParam;
import com.zzsn.event.vo.SubjectDataVo;
import freemarker.template.Template;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.io.*;
import java.net.URLEncoder;
import java.util.*;
/**
* 导出功能
......@@ -29,21 +39,20 @@ public class EventExportController {
@Autowired
private IEventService eventService;
@Autowired
private EsService esService;
/**
* 导出事件列表
*
* @param eventIdList 事件id集合
* @param size 导出数量
* @param exportParam 导出参数封装
* @author lkg
* @date 2024/4/10
*/
@GetMapping("/eventList")
public void exportEventList(@RequestParam(required = false) List<String> eventIdList,
@RequestParam(required = false) Integer size,
HttpServletResponse response) {
@PostMapping("/eventList")
public void exportEventList(@RequestBody ExportParam exportParam, HttpServletResponse response) {
String[] headers = new String[]{"id", "事件名称", "事件描述", "发布时间", "热度"};
List<EventExcelVO> eventList = eventService.frontList(eventIdList, size);
List<EventExcelVO> eventList = eventService.frontList(exportParam.getEventIdList(), exportParam.getSize());
if (CollectionUtils.isNotEmpty(eventList)) {
String name = "event.xlsx";
List<List<String>> dataList = new ArrayList<>();
......@@ -61,6 +70,126 @@ public class EventExportController {
}
}
/**
* 导出事件资讯列表
*
* @param exportParam 参数封装
* @author lkg
* @date 2024/4/11
*/
@PostMapping("/dataList")
public void exportDataList(@RequestBody ExportParam exportParam, HttpServletResponse response) {
OutputStream outputstream = null;
ByteArrayOutputStream bos = null;
try {
List<String> eventIdList = exportParam.getEventIdList();
if (CollectionUtils.isEmpty(eventIdList)) {
List<EventExcelVO> frontList = eventService.frontList(null, null);
frontList.forEach(e -> eventIdList.add(e.getId()));
}
List<SubjectDataVo> exportDataList = esService.exportDataList(eventIdList, exportParam.getSearchWord(), exportParam.getPosition(), exportParam.getCategory(),
exportParam.getArticleIdList(), exportParam.getColumn(), exportParam.getOrder(), exportParam.getType(), exportParam.getSize());
Map<String, Object> map = formatDocData(exportDataList, exportParam.getSearchWord(), exportParam.getType());
Template template = FreeMarkerConfiguration.get().getTemplate("EVENT_DATA_REPORT.ftl", "UTF-8");
bos = new ByteArrayOutputStream();
template.process(map, new OutputStreamWriter(bos));
Document document = new Document(new ByteArrayInputStream(bos.toByteArray()));
bos.reset();
document.updateFields();
document.save(bos, SaveFormat.DOCX);
String fileName = URLEncoder.encode("事件资讯", "UTF-8").replace("+", "-");
response.setHeader("content-Type", "application/msword");
response.setHeader("Content-Disposition", "attachment;filename=" + fileName);
response.setContentLength(bos.size());
outputstream = response.getOutputStream();
bos.writeTo(outputstream);
bos.close();
outputstream.flush();
outputstream.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (bos != null) {
try {
bos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (outputstream != null) {
try {
outputstream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 格式化ftl文件所需要的数据格式
*
* @param exportDataList 资讯列表
* @param searchWord 搜索词
* @param type 导出方式(1-摘要;2-正文)
* @author lkg
* @date 2024/4/11
*/
private Map<String, Object> formatDocData(List<SubjectDataVo> exportDataList, String searchWord, Integer type) {
String docTitle = "事件资讯";
//遍历取到的文章
List<Map<String, Object>> contents = new ArrayList<>();
//文档结构图
Map<String, List<Map<String, Object>>> contentSortMap = new LinkedHashMap<>();//文档结构图
//目录
Map<String, List<Map<String, Object>>> catalogSortMap = new LinkedHashMap<String, List<Map<String, Object>>>();//目录
List<Map<String, Object>> caList = new ArrayList<Map<String, Object>>();
catalogSortMap.put(docTitle, caList);//目录
int num = 1;
for (SubjectDataVo subjectDataVo : exportDataList) {
String rid = subjectDataVo.getId();
String title = Utility.getValueAfterReplaceSpecialWordNotEnter(subjectDataVo.getTitle());
String sourceAddress = subjectDataVo.getSourceAddress();
String origin = subjectDataVo.getOrigin();
String summary = subjectDataVo.getSummary();
if (StringUtils.isNotEmpty(summary)) {
summary = Utility.getValueAfterReplaceSpecialWordNotEnter(Utility.TransferHTML2Text(summary));
}
String publishDate = subjectDataVo.getPublishDate();
String contentStr = subjectDataVo.getContent();
if (StringUtils.isNotEmpty(contentStr)) {
contentStr = Utility.getValueAfterReplaceSpecialWordNotEnter(Utility.TransferHTML2Text(contentStr));
}
String info = StringUtils.isNotEmpty(origin) ? "(来源:" + origin + ",发布时间:" + publishDate + ")" : "(发布时间:" + publishDate + ")";
Map<String, Object> content = new HashMap<>();
content.put("id", rid);
content.put("title", title);
content.put("url", Utility.getValueAfterReplaceSpecialWordNotEnter(sourceAddress));
if (type == 1) {
content.put("info", summary + info);
} else {
content.put("info", info);
content.put("contentStr", contentStr);
}
contents.add(content);
contentSortMap.put(docTitle, contents);//文档结构图中,文章标题
Map<String, Object> catalog = new HashMap();
catalog.put("index", num);
catalog.put("title", title);
catalog.put("id", rid + "-" + (num - 1));
caList.add(catalog);
catalogSortMap.put(docTitle, caList);
num++;
}
Map<String, Object> map = new HashMap<>();
map.put("docTitle", docTitle);
map.put("publishDate", DateUtil.dateToString(new Date(), "yyyy-MM-dd"));
map.put("keyWords", StringUtils.isEmpty(searchWord) ? "" : searchWord);
map.put("catalogMap", catalogSortMap);
map.put("contentMap", contentSortMap);
return map;
}
private void setResponseHeader(HttpServletResponse response, String name) {
try {
try {
......
......@@ -79,11 +79,38 @@ public class EventManageController {
@RequestParam(name = "orderType", defaultValue = "asc") String orderType,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize) {
IPage<EventManageVO> pageList = eventService.pageList(eventName,eventType,startTime,endTime, order, orderType, pageNo, pageSize);
IPage<EventManageVO> pageList = eventService.pageList(eventName, eventType, startTime, endTime, order, orderType, pageNo, pageSize);
return Result.OK(pageList);
}
/**
* 地域信息-树型结构
*
* @param type 类别(1-国际;2-国内)
* @author lkg
* @date 2024/4/10
*/
@GetMapping("/regionTree")
public Result<?> regionTree(@RequestParam Integer type) {
List<Node> nodes = labelEntityService.regionTree(type);
return Result.OK(nodes);
}
/**
* 2.17 上传icon
*
* @return
*/
@PostMapping(value = "/upload")
@ResponseBody
public Result<?> uploadKnowledge(HttpServletRequest request) {
MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
MultipartFile file = multipartRequest.getFile("file");// 获取上传文件对象
String url = eventService.upload(file);
return Result.OK(url);
}
/**
* 1.2 添加
*
* @param eventParam
......@@ -167,7 +194,7 @@ public class EventManageController {
.last(" limit 1"));
event.setEventTag(one);
AddEventParam eventParam = new AddEventParam();
BeanUtils.copyProperties(event,eventParam);
BeanUtils.copyProperties(event, eventParam);
List<RegionVO> regionList = eventRegionMapService.regionList(event.getId());
eventParam.setRegionList(regionList);
return Result.OK(eventParam);
......@@ -406,7 +433,6 @@ public class EventManageController {
public Object deleteKeyWordsBind(@RequestBody SubjectPage subjectPage) {
try {
JSONObject params = ObjectUtil.objectToJSONObject(subjectPage);
;
String url = SERVICE_PROJECT_URL + "event/deleteKeyWordsBind";
return HttpUtil.doPost(url, params, 10000);
} catch (Exception e) {
......@@ -445,29 +471,49 @@ public class EventManageController {
}
/**
* 地域信息-树型结构
* 模型信息列表
*
* @param type 类别(1-国际;2-国内)
* @author lkg
* @date 2024/4/10
* @date 2024/4/11
*/
@GetMapping("/regionTree")
public Result<?> regionTree(@RequestParam Integer type) {
List<Node> nodes = labelEntityService.regionTree(type);
return Result.OK(nodes);
@GetMapping("/modelList")
public Result<?> modelList() {
List<ModelVO> modelVOS = eventService.modelList();
return Result.OK(modelVOS);
}
/**
* 2.17 上传icon
* 算法模型信息列表
*
* @return
* @param subjectId 专题id
* @param type 类型id
* @author lkg
* @date 2024/4/11
*/
@PostMapping(value = "/upload")
@ResponseBody
public Result<?> uploadKnowledge(HttpServletRequest request) {
MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
MultipartFile file = multipartRequest.getFile("file");// 获取上传文件对象
String url = eventService.upload(file);
return Result.OK(url);
@GetMapping("/algorithmModelList")
public Object algorithmModelList(@RequestParam String subjectId, @RequestParam Integer type) {
String url = SERVICE_PROJECT_URL + "event/listNoPage";
Map<String, String> params = new HashMap<>();
params.put("subjectId", subjectId);
params.put("type", type.toString());
return HttpUtil.doGet(url, params, "utf-8");
}
/**
* 模型绑定
*
* @author lkg
* @date 2024/4/11
*/
@PostMapping("/modelBind")
public Object modelBind(@RequestBody SubjectPage subjectPage) {
try {
JSONObject params = ObjectUtil.objectToJSONObject(subjectPage);
String url = SERVICE_PROJECT_URL + "event/modelBind";
return HttpUtil.doPost(url, params, 10000);
} catch (Exception e) {
return null;
}
}
}
......@@ -38,7 +38,7 @@ public class SubjectAnalysis implements Serializable {
private String sourceAddress;
/*重复数*/
private Integer repeatNum;
/*观点分析下的类型(1-新闻;2-论坛;3-微博)*/
/*观点分析下的类型(1-新闻;2-论坛;3-微博;4-专家)*/
private Integer type;
/*分类(1-观点分析;2-事件脉络;3-伪事件脉络)*/
//伪事件脉络 即当事件脉络资讯数量少于约定数量,通过python算法生成临时的资讯数量大于/等于约定数量的事件脉络。
......@@ -46,5 +46,6 @@ public class SubjectAnalysis implements Serializable {
/*分析时间*/
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
private Date analysisDate;
/**专家名称*/
private String professionName;
}
......@@ -123,4 +123,12 @@ public interface EventMapper extends BaseMapper<Event> {
* @date 2024/4/10
*/
List<EventExcelVO> frontList(@Param("eventIdList") List<String> eventIdList, @Param("size") Integer size);
/**
* 模型信息列表
*
* @author lkg
* @date 2024/4/11
*/
List<ModelVO> modelList();
}
......@@ -125,7 +125,8 @@
</select>
<select id="topEventList" resultType="com.zzsn.event.vo.EventTopVO">
select t.id,t.event_name,t.publish_date,t.total_hot,ec.type_name from event t
select t.id,t.event_name,t.start_time,t.end_time,t.publish_date,t.total_hot,ec.type_name
from event t
inner join event_category ec on t.event_type = ec.id
where t.publish_status = 1 and t.face_public = 1
<if test="startTime!=null and startTime != ''">
......@@ -162,9 +163,11 @@
SELECT
e.id AS eventId,
e.event_name,
e.start_time,
e.end_time,
r.name_cn AS regionName
FROM
( SELECT id, event_name FROM event WHERE publish_status = 1 AND face_public = 1 ) e
( SELECT id, event_name,start_time,end_time FROM event WHERE publish_status = 1 AND face_public = 1 ) e
INNER JOIN event_region_map m ON e.id = m.event_id
INNER JOIN sys_base_region r ON m.top_region_id = r.id
WHERE m.type = #{type}
......@@ -174,8 +177,8 @@
</select>
<select id="frontList" resultType="com.zzsn.event.vo.EventExcelVO">
select id,event_name,event_describe,publish_date,total_hot from event where publish_status = 1 and face_public =
1
select id,event_name,event_describe,publish_date,total_hot from event
where publish_status = 1 and face_public = 1
<if test="eventIdList != null and eventIdList.size() > 0">
and id in
<foreach collection="eventIdList" open="(" separator="," close=")" item="item">
......@@ -187,4 +190,8 @@
limit #{size}
</if>
</select>
<select id="modelList" resultType="com.zzsn.event.vo.ModelVO">
select id,model_name,type from model where pid = '0' and type is not null
</select>
</mapper>
\ No newline at end of file
package com.zzsn.event.service;
import cn.hutool.core.map.MapUtil;
import cn.hutool.json.JSONUtil;
import com.alibaba.fastjson2.JSON;
import com.baomidou.mybatisplus.core.metadata.IPage;
......@@ -32,7 +33,6 @@ import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
......@@ -113,7 +113,7 @@ public class EsService {
* @param startDate 开始时间
* @param endDate 结束时间
*/
public Integer count(String subjectId, String startDate, String endDate) {
public int count(String subjectId, String startDate, String endDate) {
long count = 0L;
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
......@@ -190,7 +190,8 @@ public class EsService {
* @author lkg
* @date 2024/4/10
*/
public IPage<SubjectDataVo> frontListByPage(List<String> subjectIdList, String searchWord, String position, Integer category, String column, String order, int pageNo, int pageSize) throws Exception {
public IPage<SubjectDataVo> frontListByPage(List<String> subjectIdList, String searchWord, String position, Integer category,
String column, String order, int pageNo, int pageSize) {
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//设置分页参数
......@@ -219,7 +220,7 @@ public class EsService {
searchSourceBuilder.trackTotalHits(true);
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("subjectId", subjectIdList));
boolQuery.must(QueryBuilders.termsQuery("subjectId.keyword", subjectIdList));
if (StringUtils.isNotEmpty(searchWord)) {
if (category == 1) {
boolQuery.must(QueryBuilders.matchQuery(position, searchWord));
......@@ -230,19 +231,24 @@ public class EsService {
boolQuery.mustNot(QueryBuilders.matchQuery("deleteFlag", "1"));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] searchHits = searchResponse.getHits().getHits();
List<SubjectDataVo> list = new ArrayList<>();
for (SearchHit hit : searchHits) {
String queryInfo = hit.getSourceAsString();
SubjectDataVo info = JSONUtil.toBean(queryInfo, SubjectDataVo.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
String index = hit.getIndex();
info.setIndex(index);
list.add(info);
IPage<SubjectDataVo> pageData = new Page<>();
try {
List<SubjectDataVo> list = new ArrayList<>();
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] searchHits = searchResponse.getHits().getHits();
for (SearchHit hit : searchHits) {
String queryInfo = hit.getSourceAsString();
SubjectDataVo info = JSONUtil.toBean(queryInfo, SubjectDataVo.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
String index = hit.getIndex();
info.setIndex(index);
list.add(info);
}
pageData = new Page<>(pageNo, pageSize, searchResponse.getHits().getTotalHits().value);
pageData.setRecords(list);
} catch (Exception e) {
e.printStackTrace();
}
IPage<SubjectDataVo> pageData = new Page<>(pageNo, pageSize, searchResponse.getHits().getTotalHits().value);
pageData.setRecords(list);
return pageData;
}
......@@ -293,7 +299,7 @@ public class EsService {
* @author lkg
* @date 2024/4/10
*/
public List<SubjectDataVo> queryRecommendList(String id, String title, Integer pageNo, Integer pageSize) throws IOException {
public List<SubjectDataVo> queryRecommendList(String subjectId,String id, String title, Integer pageNo, Integer pageSize) {
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//设置分页参数
......@@ -308,21 +314,27 @@ public class EsService {
searchSourceBuilder.fetchSource(fetchFields, null);
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termQuery("subjectId.keyword", subjectId));
boolQuery.must(QueryBuilders.matchQuery("title", title));
boolQuery.mustNot(QueryBuilders.termQuery("id", id));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
searchSourceBuilder.collapse(new CollapseBuilder("sourceAddress.keyword"));
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] searchHits = searchResponse.getHits().getHits();
searchSourceBuilder.collapse(new CollapseBuilder("title.keyword"));
List<SubjectDataVo> list = new ArrayList<>();
for (SearchHit hit : searchHits) {
String index = hit.getIndex();
String queryInfo = hit.getSourceAsString();
SubjectDataVo info = com.alibaba.fastjson.JSON.parseObject(queryInfo, SubjectDataVo.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
info.setIndex(index);
list.add(info);
try {
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] searchHits = searchResponse.getHits().getHits();
for (SearchHit hit : searchHits) {
String index = hit.getIndex();
String queryInfo = hit.getSourceAsString();
SubjectDataVo info = com.alibaba.fastjson.JSON.parseObject(queryInfo, SubjectDataVo.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
info.setIndex(index);
list.add(info);
}
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
......@@ -339,74 +351,200 @@ public class EsService {
public Map<String, Integer> getSimilarNumber(String subjectId, List<String> articleIdList) {
Map<String, Integer> map = new HashMap<>();
Map<String, String> markMap = getMark(subjectId, articleIdList);
List<String> markList = new ArrayList<>(markMap.keySet());
SearchRequest searchRequest = new SearchRequest(Constants.ES_REPEAT_OLD);
if (MapUtil.isNotEmpty(markMap)) {
List<String> markList = new ArrayList<>(markMap.keySet());
SearchRequest searchRequest = new SearchRequest(Constants.ES_REPEAT_OLD);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("repeatMark", markList));
searchSourceBuilder.size(0);
searchSourceBuilder.trackTotalHits(true);
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("group_mark")
.field("repeatMark")
.order(BucketOrder.count(false))
.size(articleIdList.size());
searchSourceBuilder.aggregation(aggregationBuilder);
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = response.getAggregations();
Terms groupSource = aggregations.get("group_mark");
List<? extends Terms.Bucket> buckets = groupSource.getBuckets();
if (CollectionUtils.isNotEmpty(buckets)) {
for (Terms.Bucket bucket : buckets) {
map.put(markMap.get(bucket.getKeyAsString()), (int) bucket.getDocCount());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
return map;
}
/**
* 获取资讯所在蔟(蔟中存储重复数据信息)
*
* @param subjectId 专题id
* @param articleIdList 资讯id集合
* @author lkg
* @date 2024/4/10
*/
private Map<String, String> getMark(String subjectId, List<String> articleIdList) {
Map<String, String> map = new HashMap<>();
if (CollectionUtils.isNotEmpty(articleIdList)) {
List<String> idList = new ArrayList<>();
for (String articleId : articleIdList) {
idList.add(articleId.replace(subjectId, ""));
}
SearchRequest searchRequest = new SearchRequest(Constants.ES_REPEAT_OLD);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("articleId", idList));
boolQuery.must(QueryBuilders.termQuery("subjectId", subjectId));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = response.getHits().getHits();
if (hits != null && hits.length != 0) {
for (SearchHit hit : hits) {
String queryInfo = hit.getSourceAsString();
RepeatHold info = JSONUtil.toBean(queryInfo, RepeatHold.class);
map.put(info.getRepeatMark(), info.getSubjectId() + info.getArticleId());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
return map;
}
/**
* 按标题匹配数据并按来源分组
*
* @param title 标题
* @param publishDate 发布时间
* @author lkg
* @date 2024/4/11
*/
public List<String> groupByOrigin(String title, String publishDate) {
List<String> originList = new ArrayList<>();
SearchRequest searchRequest = new SearchRequest(Constants.COLLECT_INDEX);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//只返回分组聚合结果,不返回具体数据
searchSourceBuilder.size(0);
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("repeatMark", markList));
searchSourceBuilder.size(0);
searchSourceBuilder.trackTotalHits(true);
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("group_mark")
.field("repeatMark")
.order(BucketOrder.count(false))
.size(articleIdList.size());
searchSourceBuilder.aggregation(aggregationBuilder);
String[] arr = new String[]{"title"};
boolQuery.must(QueryBuilders.multiMatchQuery(title, arr));
boolQuery.filter(QueryBuilders.rangeQuery("publishDate").gt(EsDateUtil.esFieldDateFormat(publishDate)));
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("group_origin")
.field("origin.keyword")
.size(20)
.order(BucketOrder.count(false));
searchSourceBuilder.query(boolQuery);
searchSourceBuilder.aggregation(aggregationBuilder);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = response.getAggregations();
Terms groupSource = aggregations.get("group_mark");
List<? extends Terms.Bucket> buckets = groupSource.getBuckets();
if (CollectionUtils.isNotEmpty(buckets)) {
for (Terms.Bucket bucket : buckets) {
map.put(markMap.get(bucket.getKeyAsString()), (int) bucket.getDocCount());
}
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
Terms groupOrigin = aggregations.get("group_origin");
List<? extends Terms.Bucket> buckets = groupOrigin.getBuckets();
for (Terms.Bucket bucket : buckets) {
String origin = bucket.getKeyAsString();
originList.add(origin);
}
} catch (Exception e) {
e.printStackTrace();
}
return map;
return originList;
}
/**
* 获取资讯所在蔟
* 报告导出的资讯信息列表
*
* @param subjectId 专题id
* @param subjectIdList 专题id集合
* @param searchWord 搜索词
* @param position 搜索位置(title-标题;content-内容)
* @param category 匹配度(1-模糊;2-精确)
* @param articleIdList 资讯id集合
* @param column 排序字段
* @param order 排序方式(asc-正序;desc-倒序)
* @param type 导出方式(1-摘要;2-正文)
* @param pageSize 返回条数
* @author lkg
* @date 2024/4/10
* @date 2024/4/11
*/
private Map<String, String> getMark(String subjectId, List<String> articleIdList) {
Map<String, String> map = new HashMap<>();
List<String> idList = new ArrayList<>();
for (String articleId : articleIdList) {
idList.add(articleId.replace(subjectId, ""));
}
SearchRequest searchRequest = new SearchRequest(Constants.ES_REPEAT_OLD);
public List<SubjectDataVo> exportDataList(List<String> subjectIdList, String searchWord, String position, Integer category,
List<String> articleIdList, String column, String order,Integer type, Integer pageSize) {
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
if (CollectionUtils.isNotEmpty(articleIdList)) {
pageSize = articleIdList.size();
}
//设置分页参数
searchSourceBuilder.size(pageSize);
if (column.equals("score")) {
if (order.equals("asc")) {
searchSourceBuilder.sort(SortBuilders.scoreSort().order(SortOrder.ASC));
searchSourceBuilder.sort("publishDate", SortOrder.ASC);
} else if (order.equals("desc")) {
searchSourceBuilder.sort(SortBuilders.scoreSort().order(SortOrder.DESC));
searchSourceBuilder.sort("publishDate", SortOrder.DESC);
}
} else if (column.equals("publishDate")) {
if (order.equals("desc")) {
searchSourceBuilder.sort("publishDate", SortOrder.DESC);
searchSourceBuilder.sort(SortBuilders.scoreSort().order(SortOrder.DESC));
} else if (order.equals("asc")) {
searchSourceBuilder.sort("publishDate", SortOrder.ASC);
searchSourceBuilder.sort(SortBuilders.scoreSort().order(SortOrder.ASC));
}
}
String[] fetchFields = new String[]{"id", "title", "summary", "origin", "publishDate", "sourceAddress"};
if (type == 2) {
fetchFields = new String[]{"id", "title", "summary", "content", "origin", "publishDate", "sourceAddress"};
}
searchSourceBuilder.fetchSource(fetchFields, null);
//默认最大数量是10000,设置为true后,显示准确数量
searchSourceBuilder.trackTotalHits(true);
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("articleId", idList));
boolQuery.must(QueryBuilders.termQuery("subjectId", subjectId));
boolQuery.must(QueryBuilders.termsQuery("subjectId.keyword", subjectIdList));
if (CollectionUtils.isNotEmpty(articleIdList)) {
boolQuery.must(QueryBuilders.termsQuery("id", articleIdList));
}
if (StringUtils.isNotEmpty(searchWord)) {
if (category == 1) {
boolQuery.must(QueryBuilders.matchQuery(position, searchWord));
} else if (category == 2) {
boolQuery.must(QueryBuilders.matchPhraseQuery(position, searchWord));
}
}
boolQuery.mustNot(QueryBuilders.matchQuery("deleteFlag", "1"));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
List<SubjectDataVo> list = new ArrayList<>();
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = response.getHits().getHits();
if (hits != null && hits.length != 0) {
for (SearchHit hit : hits) {
String queryInfo = hit.getSourceAsString();
RepeatHold info = JSONUtil.toBean(queryInfo, RepeatHold.class);
map.put(info.getRepeatMark(), info.getSubjectId() + info.getArticleId());
}
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] searchHits = searchResponse.getHits().getHits();
for (SearchHit hit : searchHits) {
String queryInfo = hit.getSourceAsString();
SubjectDataVo info = JSONUtil.toBean(queryInfo, SubjectDataVo.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
list.add(info);
}
} catch (Exception e) {
e.printStackTrace();
}
return map;
return list;
}
......@@ -444,37 +582,4 @@ public class EsService {
}
return null;
}
public List<String> groupByOrigin(String title, String publishDate) {
List<String> originList = new ArrayList<>();
SearchRequest searchRequest = new SearchRequest(Constants.COLLECT_INDEX);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//只返回分组聚合结果,不返回具体数据
searchSourceBuilder.size(0);
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
String[] arr = new String[]{"title"};
boolQuery.must(QueryBuilders.multiMatchQuery(title, arr));
boolQuery.filter(QueryBuilders.rangeQuery("publishDate").gt(EsDateUtil.esFieldDateFormat(publishDate)));
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("group_origin")
.field("origin.keyword")
.size(20)
.order(BucketOrder.count(false));
searchSourceBuilder.query(boolQuery);
searchSourceBuilder.aggregation(aggregationBuilder);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
Terms groupOrigin = aggregations.get("group_origin");
List<? extends Terms.Bucket> buckets = groupOrigin.getBuckets();
for (Terms.Bucket bucket : buckets) {
String origin = bucket.getKeyAsString();
originList.add(origin);
}
} catch (Exception e) {
e.printStackTrace();
}
return originList;
}
}
......@@ -20,10 +20,11 @@ public interface EsStatisticsService {
* @param subjectId 专题id
* @param startTime 开始时间
* @param endTime 结束时间
* @param type 1-按小时;2-按天
* @author lkg
* @date 2024/1/25
*/
Map<String, Object> totalAndMax(String subjectId, String startTime, String endTime);
Map<String, String> totalAndMax(String subjectId, String startTime, String endTime, Integer type);
/**
......@@ -65,11 +66,12 @@ public interface EsStatisticsService {
*
* @param subjectId 专题id
* @param startTime 开始时间
* @param endTime 结束时间
* @param endTime 结束时间
* @param type 1-按小时;2-按天
* @author lkg
* @date 2024/4/10
*/
CountVO flowData(String subjectId, String startTime, String endTime);
List<CountVO> flowData(String subjectId, String startTime, String endTime, Integer type);
/**
* 时间段内事件的信息总数
......
......@@ -102,4 +102,12 @@ public interface IEventService extends IService<Event> {
* @date 2024/4/10
*/
List<EventExcelVO> frontList(List<String> eventIdList, Integer size);
/**
* 模型信息列表
*
* @author lkg
* @date 2024/4/11
*/
List<ModelVO> modelList();
}
......@@ -17,10 +17,7 @@ import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.TreeSet;
import java.util.*;
/**
* @author lkg
......@@ -59,12 +56,18 @@ public class AnalysisServiceImpl implements AnalysisService {
List<SubjectAnalysis> finalList = new ArrayList<>();
List<SubjectDataVo> dataList = esService.pageList(subjectId, null, null, Constants.FETCH_FIELDS_STATISTIC, 1,1, 15);
dataList.forEach(e -> {
String dataId = e.getId();
SubjectAnalysis subjectAnalysis = new SubjectAnalysis();
BeanUtils.copyProperties(e, subjectAnalysis);
subjectAnalysis.setPublishDate(DateUtil.stringToDate(e.getPublishDate(), "yyyy-MM-dd HH:mm:ss"));
//todo 重复数
// List<SubjectDataVo> subjectDataVoList = esService.dataById(subjectId, e.getId());
// subjectAnalysis.setRepeatNum(subjectDataVoList.size());
List<String> idList = new ArrayList<>();
idList.add(dataId);
Map<String, Integer> similarNumber = esService.getSimilarNumber(subjectId, idList);
Integer count = similarNumber.get(dataId);
if (count == null) {
count = 0;
}
subjectAnalysis.setRepeatNum(count);
finalList.add(subjectAnalysis);
});
list = finalList;
......
......@@ -48,30 +48,36 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
private LabelEntityService labelEntityService;
@Override
public Map<String, Object> totalAndMax(String subjectId, String startTime, String endTime) {
Map<String, Object> map = new HashMap<>();
public Map<String, String> totalAndMax(String subjectId, String startTime, String endTime, Integer type) {
Map<String, String> map = new HashMap<>();
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
SearchSourceBuilder searchSourceBuilder = formatSourceBuilder(subjectId, null, startTime, endTime);
searchSourceBuilder.size(0);
searchSourceBuilder.trackTotalHits(true);
DateHistogramAggregationBuilder aggregation = AggregationBuilders.dateHistogram("group_hour")
.field("publishDate")
.calendarInterval(DateHistogramInterval.HOUR)
.format("yyyy-MM-dd HH")
.order(BucketOrder.count(false));
DateHistogramAggregationBuilder aggregation = AggregationBuilders.dateHistogram("group_hour").field("publishDate");
if (type == 1) {
aggregation.calendarInterval(DateHistogramInterval.HOUR)
.format("yyyy-MM-dd HH")
.order(BucketOrder.count(false));
} else if (type == 2) {
aggregation.calendarInterval(DateHistogramInterval.DAY)
.format("yyyy-MM-dd")
.order(BucketOrder.count(false));
}
searchSourceBuilder.aggregation(aggregation);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
long value = response.getHits().getTotalHits().value;
map.put("totalCount", value);
map.put("totalCount", String.valueOf(value));
Aggregations aggregations = response.getAggregations();
ParsedDateHistogram groupHour = aggregations.get("group_hour");
List<? extends Histogram.Bucket> buckets = groupHour.getBuckets();
if (CollectionUtils.isNotEmpty(buckets)) {
Histogram.Bucket bucket = buckets.get(0);
long count = bucket.getDocCount();
map.put("max", count);
map.put("max", String.valueOf(count));
map.put("maxTime", bucket.getKeyAsString());
}
} catch (Exception e) {
e.printStackTrace();
......@@ -244,22 +250,23 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
}
@Override
public CountVO flowData(String subjectId, String startTime, String endTime) {
CountVO countVO = new CountVO();
public List<CountVO> flowData(String subjectId, String startTime, String endTime,Integer type) {
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
SearchSourceBuilder searchSourceBuilder = formatSourceBuilder(subjectId, null, startTime, endTime);
searchSourceBuilder.size(0);
searchSourceBuilder.trackTotalHits(true);
DateHistogramAggregationBuilder aggregation = AggregationBuilders.dateHistogram("group_day")
.field("publishDate");
aggregation.calendarInterval(DateHistogramInterval.DAY).format("yyyy-MM-dd");
if (type == 1) {
aggregation.calendarInterval(DateHistogramInterval.HOUR).format("yyyy-MM-dd HH");
} else if (type == 2) {
aggregation.calendarInterval(DateHistogramInterval.DAY).format("yyyy-MM-dd");
}
searchSourceBuilder.aggregation(aggregation);
searchRequest.source(searchSourceBuilder);
List<CountVO> list = new ArrayList<>();
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
long value = response.getHits().getTotalHits().value;
countVO.setValue(value);
List<CountVO> list = new ArrayList<>();
Aggregations aggregations = response.getAggregations();
ParsedDateHistogram groupHour = aggregations.get("group_day");
List<? extends Histogram.Bucket> buckets = groupHour.getBuckets();
......@@ -271,11 +278,10 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
list.add(vo);
}
}
countVO.setChildren(list);
} catch (Exception e) {
e.printStackTrace();
}
return countVO;
return list;
}
@Override
......@@ -311,7 +317,7 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("subjectId", Arrays.asList(subjectId.split(","))));
boolQuery.must(QueryBuilders.termsQuery("subjectId.keyword", Arrays.asList(subjectId.split(","))));
//标签id
if (StringUtils.isNotEmpty(relationId)) {
BoolQueryBuilder relationIdQuery = QueryBuilders.boolQuery();
......
......@@ -118,13 +118,15 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
type = 1;
}
List<EventFrontVO> pageList = baseMapper.frontPageList(eventName, eventType,labelField,labelName,type,order,orderType, offset, pageSize);
//获取专题资讯的首发来源
Map<String, String> map = getFirstMap(pageList);
if (MapUtil.isNotEmpty(map)) {
pageList.forEach(e -> {
String firstOrigin = map.get(e.getId());
e.setFirstOrigin(firstOrigin);
});
if (CollectionUtils.isNotEmpty(pageList)) {
//获取专题资讯的首发来源
Map<String, String> map = getFirstMap(pageList);
if (MapUtil.isNotEmpty(map)) {
pageList.forEach(e -> {
String firstOrigin = map.get(e.getId());
e.setFirstOrigin(firstOrigin);
});
}
}
//获取总条数
Integer count = baseMapper.frontTotalCount(eventName, eventType,labelField,labelName,type);
......@@ -155,10 +157,12 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
searchSourceBuilder.sort("publishDate", SortOrder.ASC);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(QueryBuilders.termsQuery("subjectId.keyword", eventIdList));
// 聚合搜索
//聚合搜索
TermsAggregationBuilder one = AggregationBuilders.terms("one").field("subjectId.keyword").size(eventIdList.size());
//ES分组取每组第一条Java写法
TopHitsAggregationBuilder topHitsAggregationBuilder = AggregationBuilders.topHits("top_docs").size(1);
TopHitsAggregationBuilder topHitsAggregationBuilder = AggregationBuilders.topHits("top_docs")
.sort("publishDate",SortOrder.ASC)
.size(1);
one.subAggregation(topHitsAggregationBuilder);
searchSourceBuilder.aggregation(one);
searchSourceBuilder.query(boolQueryBuilder);
......@@ -263,6 +267,11 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
}
@Override
public List<ModelVO> modelList() {
return baseMapper.modelList();
}
@Override
public List<EventExcelVO> frontList(List<String> eventIdList,Integer size) {
return baseMapper.frontList(eventIdList,size);
}
......
......@@ -172,11 +172,17 @@ public class AnalysisTask {
private void format(String subjectId, List<KafkaDataVo> kafkaDataVoList, List<SubjectDataVo> dataList) {
dataList.forEach(e -> {
String dataId = e.getId();
KafkaDataVo kafkaDataVo = new KafkaDataVo();
BeanUtils.copyProperties(e, kafkaDataVo);
//todo
// List<SubjectDataVo> subjectDataVoList = esService.dataById(subjectId, e.getId());
// kafkaDataVo.setRepeatNum(subjectDataVoList.size());
List<String> idList = new ArrayList<>();
idList.add(dataId);
Map<String, Integer> similarNumber = esService.getSimilarNumber(subjectId, idList);
Integer count = similarNumber.get(dataId);
if (count == null) {
count = 0;
}
kafkaDataVo.setRepeatNum(count);
kafkaDataVoList.add(kafkaDataVo);
});
}
......
......@@ -20,7 +20,7 @@ import java.util.List;
import java.util.concurrent.CompletableFuture;
/**
* todo
*
*
* @author lkg
* @date 2024/4/10
......
......@@ -193,7 +193,6 @@ public class DateUtils extends PropertyEditorSupport {
try {
_date = sformat.parse(date);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sformat.format(_date);
......
......@@ -4,9 +4,7 @@ import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.seg.common.Term;
import org.springframework.util.StringUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
......@@ -61,7 +59,7 @@ public class HanlpUtil {
}
//去重
List<String> distinctList = phraseList.stream().distinct().collect(Collectors.toList());
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(distinctList, text);
Map<String, Integer> map = getHitWordsAndTimes(distinctList, text);
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(map);
if (limitNo > list.size()) {
......@@ -85,4 +83,54 @@ public class HanlpUtil {
Matcher m = p.matcher(str);
return m.find();
}
/**
* @Description 获取srcList中在text存在的集合(包含频次)
* @author kongliufeng
* @创建时间 2020/9/3 18:41
* @Version 1.0
*/
private static Map<String, Integer> getHitWordsAndTimes(Collection<String> srcList, String text){
Map<String, Integer> map = new HashMap<>();
if(srcList==null || StringUtils.isEmpty(text)){
return map;
}
for (String s : srcList) {
int i = countKeyWordInContent(s, text);
if(i>0){
map.put(s,i);
}
}
return map;
}
/**
* @Description 计算一个词在一个文本中的次数
* @author kongliufeng
* @创建时间 2020/8/27 19:56
* @Version 1.0
*/
private static int countKeyWordInContent(String keyword, String srcContent){
if(keyword==null ||keyword.trim().equals("")){
return 0;
}
int count = 0;
int leng = srcContent.length();
int j = 0;
for (int i = 0; i < leng; i++){
if (srcContent.charAt(i) == keyword.charAt(j)){
j++;
if (j == keyword.length()){
count++;
j = 0;
}
}
else{
i = i - j;
j = 0;
}
}
return count;
}
}
......@@ -256,7 +256,6 @@ public class HttpUtil {
public static CloseableHttpResponse getProxyHttpClient(String url) {
//获取代理ip信息
//TODO
String proxyHost = "";
int proxyPort = 0;
......
......@@ -6,7 +6,7 @@ import java.util.*;
/**
* @author kongliufeng
* @Description TODO: 自定义排序
* @Description 自定义排序
* @create 2020-09-03 19:11
* @Version 1.0
*/
......
package com.zzsn.event.util;
import org.springframework.util.StringUtils;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringUtil {
public static boolean convertBoolean(String s, boolean b) {
if (s == null) {
return b;
}
if (s.equals("0")) {
return false;
}
if (s.equals("1")) {
return true;
}
return b;
}
public static String convertBooleanToString(boolean b) {
String s = b ? "1" : "0";
return s;
}
public static String trimWhiteSpace(String str) {
String s = replaceBlank(str);
String ret = s.trim();
return ret;
}
public static String replaceBlank(String str) {
/* String dest = "";
if (str != null) {
Pattern p = Pattern.compile("\\s*|\t|\r|\n");su
Matcher m = p.matcher(str);
dest = m.replaceAll("");
}*/
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
boolean bspace = Character.isWhitespace(c);
if (bspace) {
c = ' ';
}
buffer.append(c);
}
return buffer.toString();
}
//获取分隔符[和]之间的子串,如aa[abc]bbb->abc
public static List<String> getSubStrs(String str, String start, String end) {
List<String> resultStrs = new ArrayList<String>();
if (str == null || str.trim().length() == 0) {
return resultStrs;
}
String ptnstr = String.format("%s([^%s%s]+)%s", start, start, end, end);
// String ptnstr1 = "\\[([^\\[\\]]+)\\]";
Pattern pattern = Pattern.compile(ptnstr);
Matcher matcher = pattern.matcher(str);
while (matcher.find()) {
String substr = matcher.group(1);
resultStrs.add(substr);
}
return resultStrs;
}
//fromStr:aaa123bb, origStr:aaa[xxx]bb, replaceStr:[xxx]. return:123
public static String getHomologousWord(String replaceStr,
String origStr, String fromStr) {
String retStr = null;
int pos = origStr.indexOf(replaceStr);
if (pos == -1) {
return retStr;
}
String start = origStr.substring(0, pos);
String end = origStr.substring(pos + replaceStr.length());
if (start.length() > 0 && !fromStr.startsWith(start)) {
return retStr;
}
if (end.length() > 0 && !fromStr.endsWith(end)) {
return retStr;
}
retStr = fromStr.substring(start.length(),
fromStr.length() - end.length());
return retStr;
}
public static String trimBeginningBracket(String s) {
String ret = s;
if (s.length() == 0) {
return s;
}
Map<Character, Character> braketPeers
= new HashMap<Character, Character>();
braketPeers.put('【', '】');
braketPeers.put('[', ']');
braketPeers.put('[', ']');
braketPeers.put('(', ')');
braketPeers.put('(', ')');
braketPeers.put('〔', '〕');
String searchStr = s;
while (searchStr.length() > 0) {
char beginc = searchStr.charAt(0);
Character value = braketPeers.get(beginc);
if (value == null) {
break;
}
int endPos = -1;
for (int i = 1; i < searchStr.length(); i++) {
if (searchStr.charAt(i) == value) {
endPos = i;
break;
}
}
if (endPos >= 0) {
ret = searchStr.substring(endPos + 1);
searchStr = ret;
} else {
break;
}
}
return ret;
}
public static String trimMiddleBracket(String s) {
String ret = s;
if (s.length() == 0) {
return s;
}
Map<Character, Character> braketPeers = new HashMap<Character, Character>();
String[] brakets = {"】", "]", "]", ")", ")", "〕"};
braketPeers.put('【', '】');
braketPeers.put('[', ']');
braketPeers.put('[', ']');
braketPeers.put('(', ')');
braketPeers.put('(', ')');
braketPeers.put('〔', '〕');
String searchStr = s;
int index = 0;
while (searchStr.length() > 0) {
int startPos = -1;
Character value = null;
for (int i = index; i < searchStr.length(); i++) {
boolean findLeftBraket = false;
value = searchStr.charAt(i);
for (Character key : braketPeers.keySet()) {
if (value.equals(key)) {
startPos = i;
findLeftBraket = true;
break;
}
}
if (findLeftBraket) {
break;
}
}
int endPos = -1;
for (int i = startPos + 1; i < searchStr.length(); i++) {
if (null != braketPeers.get(value) && searchStr.charAt(i) == braketPeers.get(value)) {
endPos = i;
break;
}
}
if (endPos >= startPos) {
if (startPos >= 0) {
searchStr = searchStr.substring(0, startPos) + searchStr.substring(endPos + 1, searchStr.length());
}
} else {
searchStr = searchStr.replace(value.toString(), "");
index = startPos;
}
if (startPos < 0) {
ret = searchStr;
break;
}
}
for (String bs : brakets) {
ret = ret.replace(bs.toString(), "");
}
return ret;
}
public static String trimEnddingBracket(String s) {
String ret = s;
if (s.length() == 0) {
return s;
}
Map<Character, Character> braketPeers
= new HashMap<Character, Character>();
braketPeers.put('】', '【');
braketPeers.put(']', '[');
braketPeers.put(')', '(');
braketPeers.put(')', '(');
braketPeers.put('〕', '〔');
int endPos = s.length() - 1;
String searchStr = s;
while (endPos >= 0) {
char endc = searchStr.charAt(endPos);
Character value = braketPeers.get(endc);
if (value == null) {
break;
}
int startPos = -1;
for (int i = searchStr.length() - 2; i >= 0; i--) {
if (searchStr.charAt(i) == value) {
startPos = i;
break;
}
}
if (startPos >= 0) {
ret = searchStr.substring(0, startPos);
searchStr = ret;
}
endPos = startPos - 1;
}
return ret;
}
public static String delCharNotChinese(String s) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (isChinese(c)) {
buffer.append(c);
}
}
return buffer.toString();
}
public static boolean isChinese(char c) {
if (c >= 0x4e00 && c <= 0x9fa5) {
return true;
}
return false;
}
public static String toBanjiao(String s) {
if (s == null || s.length() == 0) {
return s;
}
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c >= 65281 && c <= 65374) {
c = (char) (c - 65248);
} else if (c == 12288) { // 空格
c = (char) 32;
}
buffer.append(c);
}
return buffer.toString();
}
public static String listToString(List<String> arr) {
StringBuffer buffer = new StringBuffer();
if (arr == null) {
return buffer.toString();
}
for (int i = 0; i < arr.size(); i++) {
buffer.append(arr.get(i));
if (i != arr.size() - 1) {
buffer.append(";");
}
}
return buffer.toString();
}
public static List<String> stringToList(String str) {
List<String> strs = new ArrayList<String>();
if (str == null) {
return strs;
}
String[] ss = str.split(";");
for (String s : ss) {
if (s.trim().length() == 0) {
continue;
}
strs.add(s);
}
return strs;
}
public static String normalizeHtmlTransf(String s) {
String ret = s.replaceAll("&bull;", "·");
ret = ret.replaceAll("&middot;", "·");
ret = ret.replaceAll("&nbsp;", " ");
ret = ret.replaceAll("&quot;", "\"");
ret = ret.replaceAll("&amp;", "&");
ret = ret.replace('・', '·');
ret = ret.replace("&ldquo;", "\"");
ret = ret.replace("&rdquo;", "\"");
ret = ret.replace("&hellip;", "...");
ret = ret.replace("&lt;", "<");
ret = ret.replace("&gt;", ">");
ret = ret.replace("&mdash;", "—");
ret = ret.replace("&ndash;", "–");
ret = ret.replace("&tilde;", "~");
ret = ret.replace("&lsquo;", "'");
ret = ret.replace("&rsquo;", "'");
ret = ret.replace("&sbquo;", ",");
ret = ret.replace("&lsaquo;", "‹");
ret = ret.replace("&rsaquo;", "›");
ret = ret.replace("&hellip;", "…");
// ret = ret.replace("|", " ");
return ret;
}
public static String normalizeSegTransf(String s) {
String ret = s.replaceAll("\r\n;", " ");
ret = ret.replace("\n", "");
ret = ret.replace("|", " ");
return ret;
}
/**
* @Description 获取srcList中在text存在的集合(包含频次)
* @author kongliufeng
* @创建时间 2020/9/3 18:41
* @Version 1.0
*/
public static Map<String, Integer> getHitWordsAndTimes(Collection<String> srcList, String text){
Map<String, Integer> map = new HashMap<>();
if(srcList==null || StringUtils.isEmpty(text)){
return map;
}
for (String s : srcList) {
int i = countKeyWordInContent(s, text);
if(i>0){
map.put(s,i);
}
}
return map;
}
/**
* @Description 判断一个词是否在文本中
* @author kongliufeng
* @创建时间 2020/9/3 18:26
* @Version 1.0
*/
public static Boolean isKeyWordInText(String keyWord, String text){
if(keyWord==null || text==null)
return false;
int leng = text.length();
int j = 0;
for (int i = 0; i < leng; i++){
if (text.charAt(i) == keyWord.charAt(j)){
j++;
if (j == keyWord.length()){
return true;
}
}
else{
i = i - j;
j = 0;
}
}
return false;
}
/**
* @Description 计算一个词在一个文本中的次数
* @author kongliufeng
* @创建时间 2020/8/27 19:56
* @Version 1.0
*/
public static int countKeyWordInContent(String keyword, String srcContent){
if(keyword==null ||keyword.trim().equals("")){
return 0;
}
int count = 0;
int leng = srcContent.length();
int j = 0;
for (int i = 0; i < leng; i++){
if (srcContent.charAt(i) == keyword.charAt(j)){
j++;
if (j == keyword.length()){
count++;
j = 0;
}
}
else{
i = i - j;
j = 0;
}
}
return count;
}
/**
* @Description 在文本中根据自定义组合词匹配,返回匹配中的词
* 例如:组合词如:(产业链|供应链)+主席 == 主席+供应链;产业链+主席
*
* 括号里面是或
* @author kongliufeng
* @创建时间 2020/9/9 10:05
* @Version 1.0
*/
public static String matchComposeWords(String content , String composeWords){
if(content==null || composeWords == null)
return null;
String[] matchGroups = composeWords.split(";");
StringBuilder sb = new StringBuilder();
Boolean isMatch = false;
for(String group :matchGroups){//分组,匹配其中之一即可
String[] allContent = group.split("\\+");
Boolean allContentHit = true;
StringBuilder groupMatch = new StringBuilder();
for(String ss :allContent){//全部需要匹配
Boolean orContentHit = false;
String[] orContent = ss.replaceAll("[()]", "").split("\\|");
for(String sss:orContent){//匹配其一即可跳出
//isKeyWordInText(sss,content)
if(content.contains(sss)){
//sb.append(sss).append(",");
groupMatch.append(sss).append(",");
orContentHit=true;
break;
}
}
if(orContentHit){
continue;
}else{
allContentHit = false;
break;
}
}
if(allContentHit){
sb.append(groupMatch);
isMatch = true;
break;
}
}
if(isMatch){
return sb.toString();
}else{
return null;
}
}
}
package com.zzsn.event.util;
import com.baomidou.mybatisplus.core.toolkit.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
*
* Utility:一些工具函数的集合,
* ver:2014.04.03
* ver:2014.03.26
* ver: 2014.03.14
* ver: 2014.3.11
* ver:2014.03.09
* ver:2014.03.05
* ver: 2014.03.04
* ver: 2014.02.19
* ver: 2013.11.19
* ver: 2013.10.19
* ver: 2013.09.24
* ver: 2013.09.20
*
*/
@SuppressWarnings("deprecation")
public class Utility {
//定时器控制flg
public static int flg = 0;
//任务执行状态flg
public static int status_flg = 0;
static String regEx = "[\\u4e00-\\u9fa5]";
static Pattern patChi = Pattern.compile(regEx);
static String regExAll = "[a-zA-Z\\u4e00-\\u9fa5]";
static Pattern patWord = Pattern.compile(regExAll);
static String regUnExAll = "[^a-zA-Z\\u4e00-\\u9fa5]";
static Pattern patUnWord = Pattern.compile(regUnExAll);
public static Pattern patWordAndNum = Pattern.compile("[0-9a-zA-Z\\u4e00-\\u9fa5]");
static HashMap<String, String> stemMap = null;
static String regHTMLNumcode = "&#(\\d{4,5});";
static Pattern patHTMLNumCode = Pattern.compile(regHTMLNumcode);
//<div id="ctl00_PlaceHolderMain_ctl01_ctl05_label" style="display:none">Page Content</div>
static Pattern divNoneP = Pattern.compile("(?s)<div[^>]*display:none[^>]*>.*?</div>", Pattern.CASE_INSENSITIVE);
static Pattern divP = Pattern.compile("<div>", Pattern.CASE_INSENSITIVE);
static Pattern divRP = Pattern.compile("</div>", Pattern.CASE_INSENSITIVE);
static Pattern brP = Pattern.compile("<br />", Pattern.CASE_INSENSITIVE);
static Pattern brP2 = Pattern.compile("<br/>", Pattern.CASE_INSENSITIVE);
static Pattern br2P = Pattern.compile("<br>", Pattern.CASE_INSENSITIVE);
static Pattern spaceP = Pattern.compile("&nbsp;", Pattern.CASE_INSENSITIVE);
static Pattern strongP = Pattern.compile("<strong>", Pattern.CASE_INSENSITIVE);
static Pattern strongRP = Pattern.compile("</strong>", Pattern.CASE_INSENSITIVE);
static Pattern pP = Pattern.compile("<p>", Pattern.CASE_INSENSITIVE);
static Pattern pRP = Pattern.compile("</p>", Pattern.CASE_INSENSITIVE);
static Pattern centerP = Pattern.compile("<center[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern centerRP = Pattern.compile("</center>", Pattern.CASE_INSENSITIVE);
static Pattern removeAttrP = Pattern.compile("<([a-zA-Z0-9]+)[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern commentP = Pattern.compile("(?s)<!--[^>]*>.*?<![^>]*-->", Pattern.CASE_INSENSITIVE);
static Pattern inputP = Pattern.compile("<input[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern formP = Pattern.compile("<form[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern formRP = Pattern.compile("</form>", Pattern.CASE_INSENSITIVE);
static Pattern buttonP = Pattern.compile("(?s)<button[^>]*>.*?</button>", Pattern.CASE_INSENSITIVE);
static Pattern iframeP = Pattern.compile("(?s)<iframe[^>]*>.*?</iframe>", Pattern.CASE_INSENSITIVE);
static Pattern noscriptP = Pattern.compile("(?s)<noscript>.*?</noscript>", Pattern.CASE_INSENSITIVE);
static Pattern objectP = Pattern.compile("(?s)<object[^>]*>.*?</object>", Pattern.CASE_INSENSITIVE);
static Pattern linkP = Pattern.compile("(?s)<link[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern imgReplaceP = Pattern.compile("<img([^>]*)>", Pattern.CASE_INSENSITIVE);
static Pattern imgRevReplaceP = Pattern.compile("<_img([^>]*)>", Pattern.CASE_INSENSITIVE);
static Pattern imgP = Pattern.compile("<img[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern imgRP = Pattern.compile("</img>", Pattern.CASE_INSENSITIVE);
public static Pattern aRemoveP = Pattern.compile("(?s)<a[^>]*>.*?</a>", Pattern.CASE_INSENSITIVE);
static Pattern legendRemoveP = Pattern.compile("(?s)<legend[^>]*>.*?</legend>", Pattern.CASE_INSENSITIVE);
static Pattern aP = Pattern.compile("<a[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern aRP = Pattern.compile("</a>", Pattern.CASE_INSENSITIVE);
static Pattern fontP = Pattern.compile("<font[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern fontRP = Pattern.compile("</font>", Pattern.CASE_INSENSITIVE);
static Pattern hP = Pattern.compile("<h\\d[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern hRP = Pattern.compile("</h\\d>", Pattern.CASE_INSENSITIVE);
static Pattern ulRP = Pattern.compile("</ul>", Pattern.CASE_INSENSITIVE);
static Pattern liRP = Pattern.compile("</li>", Pattern.CASE_INSENSITIVE);
static Pattern trRP = Pattern.compile("</tr>", Pattern.CASE_INSENSITIVE);
static Pattern tdRP = Pattern.compile("</td>", Pattern.CASE_INSENSITIVE);
static Pattern textareaRemoveP = Pattern.compile("(?s)<textarea[^>]*>.*?</textarea>", Pattern.CASE_INSENSITIVE);
static Pattern selectRemoveP = Pattern.compile("(?s)<select[^>]*>.*?</select>", Pattern.CASE_INSENSITIVE);
static Pattern optionRemoveP = Pattern.compile("(?s)<option[^>]*>.*?</option>", Pattern.CASE_INSENSITIVE);
static Pattern labelRemoveP = Pattern.compile("(?s)<label[^>]*>.*?</label>", Pattern.CASE_INSENSITIVE);
private static Pattern patDate0 = Pattern.compile("\\d+-\\d{1,2}-\\d+");
private static Pattern patDate1 = Pattern.compile("\\d+[-\\s/年月日]\\d{1,2}-\\d+", Pattern.CASE_INSENSITIVE);
private static Pattern patDate2 = Pattern.compile("\\d+\\s+[A-Z][a-z]+\\s+\\d+");
private static Pattern patDate3 = Pattern.compile("[A-Z][a-z\\.]+\\s+\\d{1,2},\\s+\\d+");
private static Pattern patDate4 = Pattern.compile("\\d+年\\d+月\\d+日");
private static Pattern patDate5 = Pattern.compile("\\d+/\\d{1,2}/\\d+");
private static Pattern patDate6 = Pattern.compile("\\d+\\.\\d+\\.\\d+");
private static Pattern patDate7 = Pattern.compile("\\d{1,2}-\\d{1,2}");
private static Pattern patDate8 = Pattern.compile("\\d+月\\d+日");
private static SimpleDateFormat formatter0 = new SimpleDateFormat("yyyy-MM-dd");
private static SimpleDateFormat formatter0_1 = new SimpleDateFormat("yy-MM-dd");
private static SimpleDateFormat formatter2 = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH);
private static SimpleDateFormat formatter3_1 = new SimpleDateFormat("MMM dd, yyyy", Locale.ENGLISH);
private static SimpleDateFormat formatter3_2 = new SimpleDateFormat("MMM. dd, yyyy", Locale.ENGLISH);
private static SimpleDateFormat formatter4 = new SimpleDateFormat("yyyy年MM月dd");
private static SimpleDateFormat formatter5_1 = new SimpleDateFormat("yyyy/MM/dd");
private static SimpleDateFormat formatter5_2 = new SimpleDateFormat("dd/MM/yyyy");
private static SimpleDateFormat formatter5_4 = new SimpleDateFormat("yy/MM/dd");
private static SimpleDateFormat formatter5_3 = new SimpleDateFormat("dd/MM/yy");
private static SimpleDateFormat formatter6 = new SimpleDateFormat("yyyy.MM.dd");
private static SimpleDateFormat formatter7 = new SimpleDateFormat("MM-dd");
private static SimpleDateFormat formatter8 = new SimpleDateFormat("MM月dd");
private static Date thresholdDate = null;
/*
* 判断网页文件的编码
*/
public static String getWebEncodingByStr(String content) {
String encoding = null;
Pattern p1 = Pattern.compile("<meta[^>]*>",
Pattern.CASE_INSENSITIVE);
Matcher m1 = p1.matcher(content);
while (m1.find()) {
String str = m1.group();
Pattern p2 = Pattern.compile("charset[^\\s||\"||;||'||>]*");
Matcher m2 = p2.matcher(str);
if (m2.find()) {
encoding = m2.group().substring(8);
if (encoding.trim().length() == 0) {
Pattern p3 = Pattern
.compile("charset=\"[^\\s||\"||;||>]*");
Matcher m3 = p3.matcher(str);
if (m3.find()) {
encoding = m3.group().substring(9);
}
if (encoding.trim().length() == 0) {
// encoding = DetectCharSet.detectCharSet(fileName);
// if(encoding == null){
encoding = "GB2312";
// }
}
}
return encoding;
}
}
return encoding;
}
public static String RemoveHTMLCode_old(String src) {
src = src.replaceAll("<DIV>", "\n\n");
src = src.replaceAll("</DIV>", "\n\n");
src = src.replaceAll("<div>", "");
src = src.replaceAll("</div>", "\n\n");
src = src.replaceAll("<BR>", "\n\n");
src = src.replaceAll("<br>", "\n\n");
src = src.replaceAll("<br />", "\n\n");
src = src.replaceAll("<BR />", "\n\n");
src = src.replaceAll("&nbsp;", " ");
src = src.replaceAll("<DIV>", "");
src = src.replaceAll("<div>", "");
src = src.replaceAll("&#8226;", "??");
src = src.replaceAll("<STRONG>", "");
src = src.replaceAll("</STRONG>", "");
src = src.replaceAll("<strong>", "");
src = src.replaceAll("</strong>", "");
src = src.replaceAll("</p>", "\n\n");
src = src.replaceAll("</P>", "\n\n");
src = src.replaceAll("<P>", "\n\n");
src = src.replaceAll("<p>", "\n\n");
src = src.replaceAll("<a[^>]*>", "");
src = src.replaceAll("<img[^>]*>", "");
src = src.replaceAll("</a>", "");
src = src.replaceAll("<font[^>]*>", "");
src = src.replaceAll("</font>", "");
src = src.replaceAll("<FONT[^>]*>", "");
src = src.replaceAll("</FONT>", "");
src = src.replaceAll("</h\\d>", "\n\n");
src = src.replaceAll("</H\\d>", "\n\n");
src = src.replaceAll("</ul>", "\n\n");
src = src.replaceAll("</UL>", "\n\n");
src = src.replaceAll("</li>", "\n\n");
src = src.replaceAll("</LI>", "\n\n");
src = src.replaceAll("</tr>", "\n");
src = src.replaceAll("</TR>", "\n");
src = src.replaceAll("<[^>]*>", "");
return src.trim();
}
public static String RemoveHTMLCodeWithImg(String src) {
src = src.replaceAll("(<[^>]*>)\\s*(<[^>]*>)", "$1$2");
src = divP.matcher(src).replaceAll("\r\n");
src = divRP.matcher(src).replaceAll("\r\n");
src = brP.matcher(src).replaceAll("\r\n");
src = br2P.matcher(src).replaceAll("\r\n");
src = brP2.matcher(src).replaceAll("\r\n");
src = spaceP.matcher(src).replaceAll(" ");
src = src.replaceAll("&#8226;", "??");
src = strongP.matcher(src).replaceAll("");
src = strongRP.matcher(src).replaceAll("");
src = pP.matcher(src).replaceAll("\r\n");
src = pRP.matcher(src).replaceAll("\r\n");
// src = aP.matcher(src).replaceAll("");
// src = aRP.matcher(src).replaceAll("");
// src = imgP.matcher(src).replaceAll("");
src = fontP.matcher(src).replaceAll("");
src = fontRP.matcher(src).replaceAll("");
src = hRP.matcher(src).replaceAll("\r\n");
src = ulRP.matcher(src).replaceAll("\r\n");
src = liRP.matcher(src).replaceAll("\r\n");
// src = trRP.matcher(src).replaceAll("\r\n");
// src = tdRP.matcher(src).replaceAll("\r\n");
// src = src.replaceAll("(?!(<img[^>]*>|<a[^>]*>|</a>))(<[^>]*>)", "");
// src = src.replaceAll("(?!(<img[^>]*>|<a[^>]*>|</a>|<table[^>]*>|<tbody[^>]*>|<tr[^>]*>|<td[^>]*>))(<[^>]*>)", "");
src = src.replaceAll("(?!(<img[^>]*>|<a[^>]*>|</a>|<table[^>]*>|</table>|<tbody[^>]*>|</tbody>|<tr[^>]*>|</tr>|<td[^>]*>|</td>))(<[^>]*>)", "");
src = src.replaceAll("<img", "\r\n<img");
return src.trim();
}
public static String RemoveHTMLCode(String src) {
src = src.replaceAll("(<[^>]*>)\\s*(<[^>]*>)", "$1$2");
src = divP.matcher(src).replaceAll("\n\n");
src = divRP.matcher(src).replaceAll("\n\n");
src = brP.matcher(src).replaceAll("\n\n");
src = brP2.matcher(src).replaceAll("\n\n");
src = br2P.matcher(src).replaceAll("\n\n");
src = spaceP.matcher(src).replaceAll(" ");
src = src.replaceAll("&#8226;", "??");
src = strongP.matcher(src).replaceAll("");
src = strongRP.matcher(src).replaceAll("");
src = pP.matcher(src).replaceAll("\n\n");
src = pRP.matcher(src).replaceAll("\n\n");
src = aP.matcher(src).replaceAll("");
src = aRP.matcher(src).replaceAll("");
src = imgP.matcher(src).replaceAll("");
src = fontP.matcher(src).replaceAll("");
src = fontRP.matcher(src).replaceAll("");
src = hRP.matcher(src).replaceAll("\n\n");
src = ulRP.matcher(src).replaceAll("\n\n");
src = liRP.matcher(src).replaceAll("\n\n");
src = trRP.matcher(src).replaceAll("\n\n");
src = tdRP.matcher(src).replaceAll("\n\n");
src = src.replaceAll("<[^>]*>", "");
return src.trim();
}
public static String HTMLDecode(String str) {
//
// 去掉一些HTML编码
str = str.replaceAll("&quot;", "\"");
str = str.replaceAll("&nbsp;", " ");
str = str.replaceAll("&middot;", "·");
str = str.replaceAll("&amp;", "&");
str = str.replaceAll("&ldquo;", "“");
str = str.replaceAll("&rdquo;", "”");
str = str.replaceAll("&gt;", ">");
str = str.replaceAll("&lt;", "<");
str = str.replaceAll("&raquo;", "??");
str = str.replaceAll("&times;", "×");
str = str.replaceAll("&ccedil;", "??");
str = str.replaceAll("&atilde;", "??");
str = str.replaceAll("&ecirc;", "ê");
// 去掉<>
//
str = str.replaceAll("<\\?[^>]*>", "");
Matcher matcher = patHTMLNumCode.matcher(str);
while (matcher.find()) {
str = matcher.replaceFirst(String.valueOf((char) Integer.parseInt(matcher.group(1))));
matcher = patHTMLNumCode.matcher(str);
}
/*
String[] tmp = str.split(";&#|&#|;");
StringBuffer sb = new StringBuffer("");
for (int i = 0; i < tmp.length; i++) {
if (tmp[i].matches("\\d{4,5}")) {
sb.append((char) Integer.parseInt(tmp[i]));
} else {
sb.append(tmp[i]);
}
}
str = sb.toString();
*/
return str;
}
public static String RemoveHTMLControl(String htmlText) {
htmlText = textareaRemoveP.matcher(htmlText).replaceAll("");
htmlText = selectRemoveP.matcher(htmlText).replaceAll("");
htmlText = optionRemoveP.matcher(htmlText).replaceAll("");
htmlText = labelRemoveP.matcher(htmlText).replaceAll("");
htmlText = inputP.matcher(htmlText).replaceAll("");
htmlText = formP.matcher(htmlText).replaceAll("");
htmlText = buttonP.matcher(htmlText).replaceAll("");
htmlText = formRP.matcher(htmlText).replaceAll("");
return htmlText;
}
public static String RemoveStyleCode(String content) {
try {
Pattern p1 = Pattern.compile("(?s)<script\\s*.*?>(.*?)</script>",
Pattern.CASE_INSENSITIVE);
Matcher m1 = p1.matcher(content);
content = m1.replaceAll("");
Pattern p2 = Pattern.compile("(?s)<style\\s*.*?>(.*?)</style>",
Pattern.CASE_INSENSITIVE);
Matcher m2 = p2.matcher(content);
content = m2.replaceAll("");
Pattern p11 = Pattern.compile("(?s)<script\\s*.*?/>",
Pattern.CASE_INSENSITIVE);
Matcher m11 = p11.matcher(content);
content = m11.replaceAll("");
Pattern p21 = Pattern.compile("(?s)<style\\s*.*?/>",
Pattern.CASE_INSENSITIVE);
Matcher m21 = p21.matcher(content);
content = m21.replaceAll("");
content = noscriptP.matcher(content).replaceAll("");
content = objectP.matcher(content).replaceAll("");
content = linkP.matcher(content).replaceAll("");
/*
Pattern p22 = Pattern.compile("(?s)<img\\s*.*?/>",
Pattern.CASE_INSENSITIVE);
Matcher m22 = p22.matcher(content);
content = m22.replaceAll("");
*/
// 去除注释
// Pattern p3 = Pattern.compile("(?s)<!--\\s*.*?>(.*?)-->");
Pattern p3 = Pattern.compile("(?s)<!--.*?-->");
Matcher m3 = p3.matcher(content);
content = m3.replaceAll("");
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
public static String RemoveReturnCode(String src) {
/* src = src.replaceAll("\r", "");
src = src.replaceAll("\n", "");*/
return src;
}
public static String RemoveHTMLReturnCode(String src) {
//src = src.replaceAll("(<[^>]*>)[\r\n]+(<[^>]*>)", "$1$2");
/* src = src.replaceAll("\r", "");
src = src.replaceAll("\n", "");*/
return src;
}
public static String AddHTMLLine(String content) {
try {
Pattern p1 = Pattern
.compile("(?s)</div>", Pattern.CASE_INSENSITIVE);
Matcher m1 = p1.matcher(content);
content = m1.replaceAll("</div>\r\n");
Pattern p2 = Pattern.compile("(?s)<div", Pattern.CASE_INSENSITIVE);
Matcher m2 = p2.matcher(content);
content = m2.replaceAll("<div\r\n");
Pattern p3 = Pattern.compile("(?s)</p>", Pattern.CASE_INSENSITIVE);
Matcher m3 = p3.matcher(content);
content = m3.replaceAll("</p>\r\n");
Pattern p4 = Pattern.compile("(?s)<p>", Pattern.CASE_INSENSITIVE);
Matcher m4 = p4.matcher(content);
content = m4.replaceAll("<p>\r\n");
Pattern p5 = Pattern.compile("(?s)<br>", Pattern.CASE_INSENSITIVE);
Matcher m5 = p5.matcher(content);
content = m5.replaceAll("<br>\r\n");
Pattern p6 = Pattern.compile("(?s)</li>", Pattern.CASE_INSENSITIVE);
Matcher m6 = p6.matcher(content);
content = m6.replaceAll("</li>\r\n");
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
/**
* byte数组转换成16进制字符串
*
* @param src
* @return
*/
public static String bytesToHexString(byte[] src) {
StringBuilder stringBuilder = new StringBuilder();
if (src == null || src.length <= 0) {
return null;
}
for (int i = 0; i < src.length; i++) {
int v = src[i] & 0xFF;
String hv = Integer.toHexString(v);
if (hv.length() < 2) {
stringBuilder.append(0);
}
stringBuilder.append(hv);
}
return stringBuilder.toString();
}
/**
* 根据文件流读取文件真实类型
*
* @param is
* @return
*/
public static String getTypeByStream(FileInputStream is) {
byte[] b = new byte[7];
try {
is.read(b, 0, b.length);
} catch (IOException e) {
e.printStackTrace();
}
String type = bytesToHexString(b).toUpperCase();
if (type.contains("3C21444F") || type.contains("3C68746D")
|| type.contains("3C48544D")) {
return "web";
} else if (type.contains("D0CF11E0")) {
return "word";
} else if (type.contains("255044462D312E")) {
return "pdf";
} else if (type.contains("504B030414")) {
return "word";
} else if (type.contains("3C3F786D")) { //xml
return null;
} else {
return "web";// return "unknown";
}
}
public static String getFileType(String file) {
FileInputStream is;
try {
is = new FileInputStream(file);
String type = getTypeByStream(is);
if (type.equals("word")) {
//可能是doc,excel, or ppt
String readType = file.substring(file.lastIndexOf(".")).toLowerCase();
if (readType.equals(".doc") || readType.equals(".docx")) {
return "word";
} else if (readType.equals(".xls") || readType.equals(".xlsx")) {
return "excel";
} else if (readType.equals(".ppt") || readType.equals(".pptx")) {
return "ppt";
}
}
is.close();
return type;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return "unknown";
}
/**
* 获取文件扩展名
* 创建人: 刘小鹏
* 创建时间: 2015-6-4 下午6:41:36
*
* @param file
* @return
* @version 1.0
*/
public static String getFileExt(String file) {
/* int index = file.lastIndexOf(".");
if((index >=0) && (index < file.length() - 1))
{
String str = file.substring(index+1);
return "."+StringFilter(str);
}
else
{
return "";
}*/
return ".html";
}
public static String getFileName(String file) {
if (file == null) {
return null;
}
int index = file.lastIndexOf("\\");
if (index < 0) {
index = file.lastIndexOf("/");
if (index < 0) {
return file;
}
}
return file.substring(index + 1);
}
public static String getFilePath(String file) {
if (file == null) {
return null;
}
int index = file.lastIndexOf("\\");
if (index < 0) {
index = file.lastIndexOf("/");
if (index < 0) {
return "";
}
}
return file.substring(0, index + 1);
}
public static String getFileTitle(String file) {
if (file == null) {
return null;
}
file = getFileName(file);
int index = file.lastIndexOf(".");
if (index < 0) {
return file;
} else if (index == 0) {
return "";
}
return file.substring(0, index);
}
public static boolean isGoodEngSentence(String sentence) {
if (sentence.length() < 10) {
return false;
}
String newSent = sentence.replaceAll("[a-zA-Z ]", "").trim();
if (newSent.length() * 1.0 / sentence.length() > 0.3) {
return false;
}
return true;
}
public static boolean isGoodEngSentenceX(String sentence, int minlen, int maxlen, int maxOtherLen) {
if ((sentence.length() < minlen) || (sentence.length() > maxlen)) {
return false;
}
String newSent = sentence.replaceAll("[a-zA-Z ]", "").trim();
if ((newSent.length() * 1.0 / sentence.length() > 0.25) || (newSent.length() > maxOtherLen)) {
return false;
}
return true;
}
public static boolean isGoodChiSentence(String sentence) {
if (sentence.length() < 5) {
return false;
}
String newSent = sentence.replaceAll("[\\u4e00-\\u9fa5 ]", "").trim();
if (newSent.length() * 1.0 / sentence.length() > 0.4) {
return false;
}
return true;
}
public static boolean isGoodChiSentenceX(String sentence, int minlen, int maxlen, int maxOtherLen) {
if ((sentence.length() < minlen) || (sentence.length() > maxlen)) {
return false;
}
String newSent = sentence.replaceAll("[\\u4e00-\\u9fa5 ]", "").trim();
if ((newSent.length() * 1.0 / sentence.length() > 0.3) || (newSent.length() > maxOtherLen)) {
return false;
}
return true;
}
public static boolean isGoodSentence(String sentence) {
if (sentence.length() < 10) {
return false;
}
String newSent = sentence.replaceAll("[a-zA-Z\\u4e00-\\u9fa5 ]", "").trim();
if (newSent.length() * 1.0 / sentence.length() > 0.4) {
return false;
}
return true;
}
public static boolean isGoodSentence_simple(String sentence) {
String newSent = sentence.replaceAll("[a-zA-Z\\u4e00-\\u9fa5 ]", "").trim();
if (newSent.length() * 1.0 / sentence.length() > 0.4) {
return false;
}
return true;
}
public static List<String> getFiles(List<String> l, String directory, boolean bIncludeSubDir) {
if (l == null) {
l = new ArrayList<String>();
}
File file = new File(directory);
if (file.isDirectory()) {
String[] children = file.list();
File childFile;
for (int i = 0; i < children.length; i++) {
if (bIncludeSubDir) {
getFiles(l, new File(file, children[i]).getAbsolutePath(), bIncludeSubDir);
} else if ((childFile = new File(file, children[i])).isFile()) {
l.add(childFile.getAbsolutePath());
}
}
} else {
if (file.isFile()) {
l.add(directory);
}
}
return l;
}
public static boolean getFiles(String directory, BufferedWriter bw) {
try {
File file = new File(directory);
if (file.isDirectory()) {
String[] children = file.list();
for (int i = 0; i < children.length; i++) {
getFiles(new File(file, children[i]).getAbsolutePath(), bw);
}
} else {
if (file.isFile()) {
bw.write(directory);
bw.newLine();
}
}
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
public static boolean copyFile(String inFile, String outFile) {
try {
// long t1=System.currentTimeMillis();
File file = new File(inFile);
FileChannel out = new FileOutputStream(new File(outFile)).getChannel();
FileInputStream input = new FileInputStream(file);
//MappedByteBuffer buffer=new FileInputStream(file).getChannel().map(FileChannel.MapMode.READ_ONLY,0,file.length());
// buffer.load();
MappedByteBuffer buffer = input.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length());
buffer.load();
//Charset charset=Charset.defaultCharset();
//Charset charset=Charset.forName("GBK");
//CharBuffer charBuffer=charset.decode(buffer);
//System.out.println(charBuffer);
out.write(buffer);
buffer = null;
out.close();
//System.out.println("花费时间"+(System.currentTimeMillis()-t1)+"测试");
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
public static boolean isContainedChiWord(String text) {
Matcher matcher = patChi.matcher(text);
return matcher.find();
}
public static boolean isContainedWord(String text) {
Matcher matcher = patWord.matcher(text);
return matcher.find();
}
public static boolean isContainedOnlyWord(String text) {
Matcher matcher = patUnWord.matcher(text);
return (matcher.find() == false);
}
public static String stemming(String word) {
if (Utility.stemMap == null) {
try {
Pattern p = Pattern.compile("(.+?)[\\s]+(.+)");
Matcher m;
FileInputStream fin = new FileInputStream("data/model_eng.txt");
InputStreamReader ir = new InputStreamReader(fin, "UTF-8");
BufferedReader br = new BufferedReader(ir);
String temp;
stemMap = new HashMap<String, String>();
while ((temp = br.readLine()) != null) {
m = p.matcher(temp);
if (m.find())
stemMap.put(m.group(1), m.group(2));
}
ir.close();
fin.close();
} catch (Exception e) {
System.out.println("Initialize stemming failed!");
stemMap = null;
return word.trim();
}
}
if (stemMap.containsKey(word.trim()))
return stemMap.get(word.trim());
else {
return word.trim();
}
}
public static String stemmingText(String text) {
String[] words = text.split(" ");
String result = "";
for (String word : words) {
result += stemming(word) + " ";
}
return result;
}
public static <K, V extends Comparable<V>> Map<K, V> sortByValueDesc(Map<K, V> map) {
List<Entry<K, V>> list = new LinkedList<Entry<K, V>>(map.entrySet());
Collections.sort(list, new Comparator<Entry<K, V>>() {
public int compare(Entry<K, V> o1, Entry<K, V> o2) {
Comparable<V> v2 = o2.getValue();
V v1 = o1.getValue();
if (v2 == null) {
if (v1 == null) {
return 0;
} else {
return -1;
}
} else {
if (v1 == null) {
return 1;
} else {
return v2.compareTo(v1);
}
}
}
});
Map<K, V> result = new LinkedHashMap<K, V>();
Iterator<Entry<K, V>> it = list.iterator();
while (it.hasNext()) {
Entry<K, V> entry = it.next();
result.put(entry.getKey(), entry.getValue());
}
return result;
}
/**
* 提取html字符串转中的普通文本,注意处理其中的回车符
*
* @param htmlText
* @return
*/
public static String TransferHTML2Text(String htmlText) {
String text = Utility.HTMLDecode(Utility.RemoveHTMLCode(Utility.RemoveStyleCode(Utility.RemoveHTMLReturnCode(htmlText))));
text = text.replaceAll("   ", "\n");
text = text.replaceAll(" +\r\n", "\n");
text = text.replaceAll("\r\n", "\n");
text = text.replaceAll(" +", " ");
text = text.replaceAll("[\\u00A0\\u3000]", "");
text = text.replaceAll(" ", "");
text = text.replaceAll(" \n", "\n");
text = text.replaceAll("=", "");
text = text.replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n");
return text;
}
/**
* 去除特殊符号,但不去除换行符号
*/
public static String getValueAfterReplaceSpecialWordNotEnter(String str) {
if (org.apache.commons.lang3.StringUtils.isEmpty(str)) {
return "";
}
return filterASCIINotEnter(filterUnicode(str.replace("&", "&amp;")
.replace("<", "&lt;").replace(">", "&gt;")
.replace("\"", "&quot;").replace("'", "&apos;")));
}
/**
* 过滤ASCII码中的不可见字符 ,不包括换行
* 换行在ASCII表中对应的值为 10和 13
*/
private static String filterASCIINotEnter(String source) {
if (org.apache.commons.lang3.StringUtils.isBlank(source)) {
return "";
}
char[] sourceCharArr = source.toCharArray();
for (int i = 0; i < sourceCharArr.length; i++) {
// 换行字符
if (sourceCharArr[i] == 0x0A || sourceCharArr[i] == 0x0D) {
continue;
}
if (sourceCharArr[i] < 0x20 || sourceCharArr[i] == 0x7F) {
sourceCharArr[i] = 0x20;
}
}
return new String(sourceCharArr);
}
private static String filterUnicode(String source) {
Pattern parttern = Pattern.compile("([\\u007f-\\u009f]|\\u00ad|[\\u0483-\\u0489]|[\\u0559-\\u055a]|\\u058a|[\\u0591-\\u05bd]|\\u05bf|[\\u05c1-\\u05c2]|[\\u05c4-\\u05c7]|[\\u0606-\\u060a]|[\\u063b-\\u063f]|\\u0674|[\\u06e5-\\u06e6]|\\u070f|[\\u076e-\\u077f]|\\u0a51|\\u0a75|\\u0b44|[\\u0b62-\\u0b63]|[\\u0c62-\\u0c63]|[\\u0ce2-\\u0ce3]|[\\u0d62-\\u0d63]|\\u135f|[\\u200b-\\u200f]|[\\u2028-\\u202e]|\\u2044|\\u2071|[\\uf701-\\uf70e]|[\\uf710-\\uf71a]|\\ufb1e|[\\ufc5e-\\ufc62]|\\ufeff|\\ufffc)");
Matcher m = parttern.matcher(source);
if (m.find()) {
return m.replaceAll("");
}
return source;
}
/**
* 提取html字符串转中的普通文本,注意处理其中的回车符
*
* @param htmlText
* @return
*/
public static String TransferHTML2TextWithImg(String htmlText) {
String text = Utility.HTMLDecode(Utility.RemoveHTMLCodeWithImg(Utility.RemoveStyleCode(Utility.RemoveHTMLReturnCode(htmlText))));
text = text.replaceAll("   ", "\n");
text = text.replaceAll(" +\r\n", "\n");
text = text.replaceAll("\r\n", "\n");
text = text.replaceAll(" +", " ");
// text = text.replaceAll("[\\u00A0\\u3000]", "");
// text = text.replaceAll(" ", "");
text = text.replaceAll(" \n", "\n");
text = text.replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n");
//如果遇到table,则不加处理
if (text.contains("<table")) {
String[] textArr = text.split("\n");
String result ="";
for (String tex : textArr) {
if (!tex.contains("<table") && !tex.contains("<td") && !tex.contains("<tr")
&& !tex.contains("table>") && !tex.contains("td>") && !tex.contains("tr>")) {
result += "<p style='text-indent:2em;'>" + tex + "</p><br/>";
} else {
result += tex;
}
}
text = result;
} else {
text = text.replaceAll("\n\\s+", "</p>"+"<br/>" + "<p style='text-indent:2em;'>");
text = text.replaceAll("\n", "</p>"+"<br/>" + "<p style='text-indent:2em;'>");
text = "<p style='text-indent:2em;'>" + text + "</p>";
}
return text;
}
public static String normalizeHtmlTransf(String s) {
String ret = s.replaceAll("&bull;", "·");
ret = ret.replaceAll("&middot;", "·");
ret = ret.replaceAll("&nbsp;", " ");
ret = ret.replaceAll("&quot;", "\"");
ret = ret.replaceAll("&amp;", "&");
ret = ret.replace('・', '·');
ret = ret.replace("&ldquo;", "\"");
ret = ret.replace("&rdquo;", "\"");
ret = ret.replace("&hellip;", "...");
ret = ret.replace("&lt;", "<");
ret = ret.replace("&gt;", ">");
ret = ret.replace("&mdash;", "—");
ret = ret.replace("&ndash;", "–");
ret = ret.replace("&tilde;", "~");
ret = ret.replace("&lsquo;", "'");
ret = ret.replace("&rsquo;", "'");
ret = ret.replace("&sbquo;", ",");
ret = ret.replace("&lsaquo;", "‹");
ret = ret.replace("&rsaquo;", "›");
ret = ret.replace("&hellip;", "…");
ret = ret.replace("|", " ");
return ret;
}
/**
* 提取html字符串转中的普通文本,注意处理其中的回车符
*
* @param htmlText
* @return
*/
public static String TransferHTML3TextWithImg(String htmlText) {
String text = Utility.HTMLDecode(Utility.RemoveHTMLCodeWithImg(Utility.RemoveStyleCode(Utility.RemoveHTMLReturnCode(htmlText))));
text = text.replaceAll("   ", "\n");
text = text.replaceAll(" +\r\n", "\n");
text = text.replaceAll("\r\n", "\n");
text = text.replaceAll(" +", " ");
text = text.replaceAll("[\\u00A0\\u3000]", "");
text = text.replaceAll(" ", "");
text = text.replaceAll(" \n", "\n");
text = text.replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n");
return text;
}
/**
* 去掉无用的HTML标签,包括Img,a等
*
* @param htmlText
* @return
*/
public static String RemoveUselessHTMLTag(String htmlText) {
try {
/*
htmlText = htmlText.replaceAll("<([a-zA-Z0-9]+)[^>]*>", "<$1>");
htmlText = htmlText.replaceAll("(?s)<!--[^>]*>.*?<![^>]*-->", "");
htmlText = htmlText.replaceAll("(?s)<input>", "");
htmlText = htmlText.replaceAll("(?s)<form>", "");
htmlText = htmlText.replaceAll("(?s)</form>", "");
//htmlText = htmlText.replaceAll("(?s)<a>.*?</a>", "");
htmlText = htmlText.replaceAll("<a>", "<span>");
htmlText = htmlText.replaceAll("</a>", "</span>");
htmlText = htmlText.replaceAll("(?s)<iframe>.*?</iframe>", "");
htmlText = htmlText.replaceAll("(?s)<noscript>.*?</noscript>", "");
//Pattern pat = Pattern.compile("(?s)<object[^>]*>.*?</object>");
//htmlText = "<object ><param></param> <param></param> <param></param> <param></param> \r\n<param></param><param></param><param></param><video></video></object>ddd";
//htmlText = pat.matcher(htmlText).replaceAll("");
htmlText = htmlText.replaceAll("(?s)<object[^>]*>.*?</object>", "");
htmlText = htmlText.replaceAll("<img[^>]*>", "");
htmlText = htmlText.replaceAll("</img>", "");
htmlText = htmlText.replaceAll("(?s)<div[^>]*>\\s*</div>", "");
htmlText = htmlText.replaceAll("(?s)<p[^>]*>\\s*</p>", "");
//htmlText = htmlText.replaceAll("   ", "\r\n");
//htmlText = htmlText.replaceAll(" +\r?\n", "\r\n");
//htmlText = htmlText.replaceAll("\r?\n+", "\r\n");
*
*/
htmlText = Utility.RemoveStyleCode(htmlText);
htmlText = htmlText.replaceAll("&nbsp;", " ");
htmlText = divNoneP.matcher(htmlText).replaceAll("");
htmlText = textareaRemoveP.matcher(htmlText).replaceAll("");
htmlText = selectRemoveP.matcher(htmlText).replaceAll("");
htmlText = optionRemoveP.matcher(htmlText).replaceAll("");
htmlText = labelRemoveP.matcher(htmlText).replaceAll("");
htmlText = inputP.matcher(htmlText).replaceAll("");
htmlText = formP.matcher(htmlText).replaceAll("");
htmlText = buttonP.matcher(htmlText).replaceAll("");
htmlText = formRP.matcher(htmlText).replaceAll("");
htmlText = removeAttrP.matcher(htmlText).replaceAll("<$1>");
htmlText = commentP.matcher(htmlText).replaceAll("");
htmlText = legendRemoveP.matcher(htmlText).replaceAll("");
htmlText = aP.matcher(htmlText).replaceAll("<sapn>");
htmlText = aRP.matcher(htmlText).replaceAll("</sapn>");
htmlText = iframeP.matcher(htmlText).replaceAll("");
htmlText = noscriptP.matcher(htmlText).replaceAll("");
htmlText = objectP.matcher(htmlText).replaceAll("");
htmlText = imgP.matcher(htmlText).replaceAll("");
htmlText = imgRP.matcher(htmlText).replaceAll("");
htmlText = centerP.matcher(htmlText).replaceAll("");
htmlText = centerRP.matcher(htmlText).replaceAll("");
htmlText = htmlText.replaceAll("<cufontext>", "");
htmlText = htmlText.replaceAll("</cufontext>", "");
htmlText = htmlText.replaceAll("<cufon>", "");
htmlText = htmlText.replaceAll("</cufon>", "");
//htmlText = htmlText.replaceAll("(?s)<([a-zA-Z0-9]+)[^>]*>\\s*(</$1>)", "");
htmlText = htmlText.replaceAll("(?s)<ul[^>]*>\\s*</ul>", "");
htmlText = htmlText.replaceAll("(?s)<div[^>]*>\\s*</div>", "");
htmlText = htmlText.replaceAll("(?s)<p[^>]*>\\s*</p>", "");
htmlText = htmlText.replaceAll("(?s)<li[^>]*>\\s*</li>", "");
htmlText = htmlText.replaceAll("(?s)<canvas[^>]*>\\s*</canvas>", "");
return htmlText;
} catch (Exception e) {
e.printStackTrace();
return htmlText;
}
}
/**
* 去掉无用的HTML标签,包括a等
*
* @param htmlText
* @return
*/
public static String RemoveUselessHTMLTagX(String htmlText) {
try {
htmlText = Utility.RemoveStyleCode(htmlText);
htmlText = htmlText.replaceAll("&nbsp;", " ");
htmlText = divNoneP.matcher(htmlText).replaceAll("");
htmlText = textareaRemoveP.matcher(htmlText).replaceAll("");
htmlText = selectRemoveP.matcher(htmlText).replaceAll("");
htmlText = optionRemoveP.matcher(htmlText).replaceAll("");
htmlText = labelRemoveP.matcher(htmlText).replaceAll("");
htmlText = inputP.matcher(htmlText).replaceAll("");
htmlText = formP.matcher(htmlText).replaceAll("");
htmlText = buttonP.matcher(htmlText).replaceAll("");
htmlText = formRP.matcher(htmlText).replaceAll("");
htmlText = imgReplaceP.matcher(htmlText).replaceAll("<_img$1>");
htmlText = removeAttrP.matcher(htmlText).replaceAll("<$1>");
htmlText = imgRevReplaceP.matcher(htmlText).replaceAll("<img$1>");
htmlText = commentP.matcher(htmlText).replaceAll("");
htmlText = legendRemoveP.matcher(htmlText).replaceAll("");
htmlText = aP.matcher(htmlText).replaceAll("<sapn>");
htmlText = aRP.matcher(htmlText).replaceAll("</sapn>");
htmlText = iframeP.matcher(htmlText).replaceAll("");
htmlText = noscriptP.matcher(htmlText).replaceAll("");
htmlText = objectP.matcher(htmlText).replaceAll("");
//htmlText = imgP.matcher(htmlText).replaceAll("");
//htmlText = imgRP.matcher(htmlText).replaceAll("");
htmlText = centerP.matcher(htmlText).replaceAll("");
htmlText = centerRP.matcher(htmlText).replaceAll("");
htmlText = htmlText.replaceAll("<cufontext>", "");
htmlText = htmlText.replaceAll("</cufontext>", "");
htmlText = htmlText.replaceAll("<cufon>", "");
htmlText = htmlText.replaceAll("</cufon>", "");
//htmlText = htmlText.replaceAll("(?s)<([a-zA-Z0-9]+)[^>]*>\\s*(</$1>)", "");
htmlText = htmlText.replaceAll("(?s)<ul[^>]*>\\s*</ul>", "");
htmlText = htmlText.replaceAll("(?s)<div[^>]*>\\s*</div>", "");
htmlText = htmlText.replaceAll("(?s)<p[^>]*>\\s*</p>", "");
htmlText = htmlText.replaceAll("(?s)<li[^>]*>\\s*</li>", "");
htmlText = htmlText.replaceAll("(?s)<canvas[^>]*>\\s*</canvas>", "");
return htmlText;
} catch (Exception e) {
e.printStackTrace();
return htmlText;
}
}
public static String RemoveUselessLink(String contentWithTag) {
Document doc = Jsoup.parse(contentWithTag);
Elements contentElems = doc.select("a");
if ((contentElems == null) || (contentElems.size() == 0)) {
return contentWithTag;
}
for (Element aElement : contentElems) {
try {
String elementText = aElement.text().trim();
Element parentElement = aElement.parent();
String parentText = parentElement.text().trim();
elementText = elementText.replaceAll(" ", "").trim();
parentText = parentText.replaceAll(" ", "").trim();
if (parentText.equals(elementText)) {
aElement.remove();
} else {
parentText = Utility.aRemoveP.matcher(parentElement.html()).replaceAll("");
parentText = Utility.TransferHTML2Text(parentText);
if (Utility.patWordAndNum.matcher(parentText).find() == false) {
parentElement.remove();
}
}
while (parentElement.text().trim().isEmpty()) {
Element tempElement = parentElement;
parentElement = parentElement.parent();
tempElement.remove();
}
} catch (Exception e) {
continue;
}
}
return doc.outerHtml();
}
public static boolean ContainDateInfo_BAK(String content) {
try {
Matcher dateMatcher = null;
if ((dateMatcher = patDate0.matcher(content)).find()
|| (dateMatcher = patDate1.matcher(content)).find()
|| (dateMatcher = patDate2.matcher(content)).find()
|| (dateMatcher = patDate3.matcher(content)).find()
|| (dateMatcher = patDate4.matcher(content)).find()
|| (dateMatcher = patDate5.matcher(content)).find()
|| (dateMatcher = patDate6.matcher(content)).find()
|| (dateMatcher = patDate7.matcher(content)).find()
|| (dateMatcher = patDate8.matcher(content)).find()
) {
return true;
}
return false;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
public static boolean ContainDateInfo(String content) {
try {
Matcher dateMatcher = null;
if (((dateMatcher = patDate0.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 0) != null))
|| ((dateMatcher = patDate1.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 1) != null))
|| ((dateMatcher = patDate2.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 2) != null))
|| ((dateMatcher = patDate3.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 3) != null))
|| ((dateMatcher = patDate4.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 4) != null))
|| ((dateMatcher = patDate5.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 5) != null))
|| ((dateMatcher = patDate6.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 6) != null))
|| ((dateMatcher = patDate7.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 7) != null))
|| ((dateMatcher = patDate8.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 8) != null))
) {
return true;
}
return false;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
public static Matcher ContainedDateInfo_BAK(String content) {
try {
Matcher dateMatcher = null;
if ((dateMatcher = patDate0.matcher(content)).find()
|| (dateMatcher = patDate1.matcher(content)).find()
|| (dateMatcher = patDate2.matcher(content)).find()
|| (dateMatcher = patDate3.matcher(content)).find()
|| (dateMatcher = patDate4.matcher(content)).find()
|| (dateMatcher = patDate5.matcher(content)).find()
|| (dateMatcher = patDate6.matcher(content)).find()
|| (dateMatcher = patDate7.matcher(content)).find()
|| (dateMatcher = patDate8.matcher(content)).find()
) {
return dateMatcher;
}
return null;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static Matcher ContainedDateInfo(String content) {
try {
Matcher dateMatcher = null;
if (((dateMatcher = patDate0.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 0) != null))
|| ((dateMatcher = patDate1.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 1) != null))
|| ((dateMatcher = patDate2.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 2) != null))
|| ((dateMatcher = patDate3.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 3) != null))
|| ((dateMatcher = patDate4.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 4) != null))
|| ((dateMatcher = patDate5.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 5) != null))
|| ((dateMatcher = patDate6.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 6) != null))
|| ((dateMatcher = patDate7.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 7) != null))
|| ((dateMatcher = patDate8.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 8) != null))
) {
return dateMatcher;
}
return null;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static Date transDate(String source, int type) {
try {
if (thresholdDate == null) {
thresholdDate = formatter0.parse("1970-01-01");
}
Date date = null;
switch (type) {
case 0:
date = formatter0.parse(source);
if (date.before(thresholdDate)) {
date = formatter0_1.parse(source);
if (date.before(thresholdDate)) {
return null;
}
}
break;
case 1:
//date = formatter1.parse(source);
break;
case 2:
date = formatter2.parse(source);
break;
case 3:
try {
date = formatter3_1.parse(source);
} catch (Exception e) {
date = null;
}
if (date == null) {
date = formatter3_2.parse(source);
}
break;
case 4:
date = formatter4.parse(source);
break;
case 5:
try {
date = formatter5_1.parse(source);
} catch (Exception e) {
date = null;
}
if ((date == null) || (date.before(thresholdDate))) {
date = formatter5_2.parse(source);
}
if ((date == null) || (date.before(thresholdDate))) {
date = formatter5_3.parse(source);
}
if ((date == null) || (date.before(thresholdDate))) {
date = formatter5_4.parse(source);
}
break;
case 6:
date = formatter6.parse(source);
break;
case 7:
date = formatter7.parse(source);
break;
case 8:
date = formatter8.parse(source);
break;
}
if ((date != null) && (date.before(thresholdDate))) {
return null;
}
return date;
} catch (Exception e) {
return null;
}
}
public static Date transDate(String content) {
try {
Matcher dateMatcher = null;
Date date = null;
if (((dateMatcher = patDate0.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 0)) != null))
|| ((dateMatcher = patDate1.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 1)) != null))
|| ((dateMatcher = patDate2.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 2)) != null))
|| ((dateMatcher = patDate3.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 3)) != null))
|| ((dateMatcher = patDate4.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 4)) != null))
|| ((dateMatcher = patDate5.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 5)) != null))
|| ((dateMatcher = patDate6.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 6)) != null))
|| ((dateMatcher = patDate7.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 7)) != null))
|| ((dateMatcher = patDate8.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 8)) != null))
) {
return date;
}
return null;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static String transStandardDate(String content) {
try {
Date date = transDate(content);
if (date != null) {
try {
String dateStr = formatter0.format(date);
return dateStr;
} catch (Exception e) {
return null;
}
}
return null;
} catch (Exception e) {
return null;
}
}
/**
* 获取正文中的图片路径
* 创建人: 刘小鹏
* 创建时间: 2015-11-13 下午5:27:27
*
* @param text
* @param uri
* @return
* @version 1.0
*/
public static List<String> getContentImgPath(String text, String uri) {
List<String> result = new ArrayList<String>();
String baseUri = null;
Pattern p = Pattern.compile("(<img.+?src=)(\"|')(.+?)(\"|')(.*?/?>)", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(text);
String rawPath;
while (m.find()) {
rawPath = m.group(3);
if (rawPath.startsWith("http://") && !rawPath.startsWith("https://")) {
}
}
return result;
}
/**
* 创建人: 刘小鹏
* 创建时间: 2015-10-28 上午9:35:01
*
* @return
* @version 1.0
*/
public static String convertCharset(String content, String sourceCharset, String targetCharset) throws UnsupportedEncodingException {
byte[] newtemp = new String(content.getBytes(sourceCharset), sourceCharset).getBytes(targetCharset);
String result = new String(newtemp, targetCharset);
return result;
}
/**
* 获取请求路径后缀
* 创建人: 杨海龙
* 创建时间: 2015年7月10日 上午10:14:52
*
* @param sourceaddress
* @return
* @version 1.0
*/
public static String getFileSuffix(String sourceaddress) {
if (sourceaddress.lastIndexOf(".") == -1) {
return null;
}
String suffix = sourceaddress.substring(sourceaddress.lastIndexOf("."), sourceaddress.length());
if (null != suffix && (".pdf".equals(suffix.toLowerCase()) ||
".doc".equals(suffix.toLowerCase()) ||
".docx".equals(suffix.toLowerCase()) ||
".ppt".equals(suffix.toLowerCase()) ||
".pptx".equals(suffix.toLowerCase()) ||
".xls".equals(suffix.toLowerCase()) ||
".xlsx".equals(suffix.toLowerCase())
)) {
return suffix.toLowerCase();
}
return null;
}
/**
* 格式化URI
* 创建人: 刘小鹏
* 创建时间: 2015-8-20 下午3:26:00
*
* @param uri
* @return
* @version 1.0
*/
public static String formatURI(String uri) {
uri = uri.trim();
uri = uri.replaceAll("/+$", "");
return uri;
}
public static String dealImg(String contentNoTag) {
//分段后换行多余
Document document = Jsoup.parse(contentNoTag);
//img图片设置固定宽度和高度
Elements imgelements = document.select("img");
for (Iterator<Element> iterator = imgelements.iterator(); iterator.hasNext(); ) {
Element imgel = iterator.next();
if (isNotEmpty(imgel.attr("src"))) {
// if(isNotEmpty(imgel.attr("style"))) {
imgel.attr("style", "width: 50%;margin-left:23%;margin-right:27%;");
// }
// imgel.after("<br/>");
// imgel.wrap("<div style=\" text-indent:5rem;\"></div>");
} else {
imgel.remove();
}
}
return htmlEscape(document.outerHtml()).replace("</p>","").replaceAll("<html>\\n <head></head>\\n <body> \\n ","");
}
//对带标签的内容进行进一步处理
public static String htmlEscape(String content){
if(content.indexOf("\r\n") > -1){
content = content.replaceAll("\r\n", "<br/>");
}
if(content.indexOf('\n') > -1){
content = content.replaceAll("\n", "<br/>");
}
while (content.replaceAll("\\s*", "").indexOf("<br/><br/>") > -1) {
content = content.replaceAll("<br/>\\s*<br/>", "<br/>");
}
//兼容已按老逻辑处理过的数据
content = content.replaceAll("</p ><p","</p ><br/><p").replace("  ","");
String[] page = content.split("<br/>");
String convertContent = "";
if(page != null && page.length > 0 ) {
for (String section : page) {
if (StringUtils.isNotBlank(section)) {
section = section.trim();
//去除&nbsp ensp emsp空格
while (section.startsWith("&nbsp;") || section.startsWith(" ") || section.startsWith(" ")
|| section.startsWith(" ") || section.startsWith(" ")) {
if (section.startsWith("&nbsp;")) {
section = section.replaceFirst("&nbsp;", "");
} else {
section = section.substring(1).trim();
}
}
convertContent+=section;
}
}
}
return convertContent;
}
public static boolean isNotEmpty(Object object) {
if (object != null && !object.equals("") && !object.equals("null")) {
return (true);
}
return (false);
}
public static String removeHTMLScriptLabel(String contentWithTag) {
String pattern = "<script[^>]*>[\\s\\S]*?</script>";
Pattern scriptPattern = Pattern.compile(pattern);
Matcher matcher = scriptPattern.matcher(contentWithTag);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(result, "");
}
matcher.appendTail(result);
return result.toString();
}
}
......@@ -13,5 +13,7 @@ public class EventRegionVO {
private String eventId;
private String eventName;
private String startTime;
private String endTime;
private String regionName;
}
......@@ -13,6 +13,8 @@ public class EventTopVO {
private String id;
private String eventName;
private String startTime;
private String endTime;
private String publishDate;
private Integer totalHot;
private String typeName;
......
package com.zzsn.event.vo;
import lombok.Data;
import java.util.List;
/**
* 导出参数封装
*
* @author lkg
* @date 2024/4/11
*/
@Data
public class ExportParam {
//事件id集合
private List<String> eventIdList;
//搜索词
private String searchWord;
//搜索位置(title-标题;content-内容)
private String position;
//匹配度(1-模糊;2-精确)
private Integer category;
//资讯id集合
private List<String> articleIdList;
//排序字段
private String column;
//排序方式(asc-正序;desc-倒序)
private String order;
//导出条数
private Integer size;
//导出方式(1-摘要;2-正文)
private Integer type;
}
package com.zzsn.event.vo;
import lombok.Data;
/**
* 模型
*
* @author lkg
* @date 2024/4/11
*/
@Data
public class ModelVO {
private String id;
private String modelName;
private String type;
}
......@@ -48,7 +48,7 @@ spring:
url: jdbc:mysql://114.116.44.11:3306/clb_project?useUnicode=true&characterEncoding=utf-8&AllowPublicKeyRetrieval=True&serverTimezone=Asia/Shanghai&autoReconnect=true&rewriteBatchedStatements=true
username: ciglobal
password: qwer@9988&zzsn
# 多数据源配置
# 多数据源配置
multi-datasource1:
url: jdbc:mysql://114.116.44.11:3306/clb_xxl_job?characterEncoding=UTF-8&useUnicode=true&useSSL=false&tinyInt1isBit=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
username: ciglobal
......@@ -56,7 +56,7 @@ spring:
driver-class-name: com.mysql.cj.jdbc.Driver
elasticsearch:
rest:
uris: ["114.115.215.250:9700","114.116.19.92:9700","114.116.54.108:9200"]
uris: [ "114.115.215.250:9700","114.116.19.92:9700","114.116.54.108:9200" ]
username: elastic
password: zzsn9988
connection-timeout: 300000
......@@ -106,8 +106,6 @@ spring:
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
#值的反序列化器类,实现类实现了接口org.apache.kafka.common.serialization.Deserializer
value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
thymeleaf:
prefix: classpath:/templates
mybatis-plus:
mapper-locations: classpath*:com/zzsn/event/mapper/xml/*.xml,classpath*:com/zzsn/event/xxljob/mapper/xml/*.xml
......
......@@ -106,8 +106,6 @@ spring:
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
#值的反序列化器类,实现类实现了接口org.apache.kafka.common.serialization.Deserializer
value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
thymeleaf:
prefix: classpath:/templates
mybatis-plus:
mapper-locations: classpath*:com/zzsn/event/mapper/xml/*.xml,classpath*:com/zzsn/event/xxljob/mapper/xml/*.xml
......
spring:
profiles:
# active: dev
active: pro
\ No newline at end of file
active: dev
# active: pro
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论