#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @File    : search_by_dot_matrix.py
# @Time    : 2022/6/21 15:44
# @Author  : bruxelles_li
# @Software: PyCharm


"""
    pip install bert-serving-server && pip install bert-serving-client
"""
import time
from multiprocessing.pool import Pool
import datetime
import threading
import os, re
import multiprocessing as mp
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from bert_serving.client import BertClient
from es_byid import find_sent_info, find_para_info, find_sen_content, find_art_info
from tqdm import tqdm
from numpy import *
import numpy as np
import logging
from 文章id生成 import create_title_id

from 缓存处理 import memory_cache
# todo: 定义进程队列
q = mp.Queue()
# 段落
q1 = mp.Queue()
# 句子
q2 = mp.Queue()
lock = mp.Lock()

# todo: 根据某列的属性值获取数据 -> df.loc[df['columnName'] == 'the value']
# todo: 限制线程的最大数量为4个
# sem = threading.Semaphore(4)  # 限制线程的最大数量为4个
# 为了控制最大线程数，达到最大线程时应在线程外阻塞，有线程结束后再创建新线程
# 多进程


# record = []

logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %('
                                               'message)s')
logger = logging.getLogger(__name__)
type2namedict = {
            "speech_by_leaders": "领导讲话",
            "policy_document": "政策文件",
            "expert_opinion": "专家观点",
            "enterprise_case": "企业案例",
            "other": "其他"
          }

# todo: 调用bert编码服务  "114.116.54.108"
bc = BertClient(check_length=False, port_out=5556, port=5555)
para_prob = 0.75
sent_prob = 0.75
# todo: 定义去重阈值
duplicated_prob = 0.97
# todo: 定义段落保存临时文件路径和文件路径
# para_vector_path = "database/para_database/para_vector_path/other.txt"
# temp_para_vector_path = "database/para_database/para_vector_path/temp_other.txt"
# temp_para_np_path = "database/para_database/para_vector_path/temp_other.npy"
# todo: 定义段落保存临时文件路径和文件路径
# sen_vector_path = "database/sent_database/sen_vector_path/other.txt"
# temp_sen_vector_path = "database/sent_database/sen_vector_path/temp_other.txt"
# temp_sen_np_path = "database/sent_database/sen_vector_path/temp_other.npy"
# todo: 段落的矩阵文件路径
# para_np_path1 = "database/para_database/speech_by_leaders.npy"
para_np_path1 = "database/para_database/update_leaders.npy"
para_np_arrary1 = np.load(para_np_path1)
# para_np_path2 = "database/para_database/expert_opinion.npy"
para_np_path2 = "database/para_database/update_expert.npy"
para_np_arrary2 = np.load(para_np_path2)
# para_np_path3 = "database/para_database/policy_document.npy"
para_np_path3 = "database/para_database/update_policy.npy"
para_np_arrary3 = np.load(para_np_path3)
para_np_path4 = "database/para_database/enterprise_case.npy"
para_np_arrary4 = np.load(para_np_path4)
para_np_path5 = "database/para_database/other_para.npy"
para_np_arrary5 = np.load(para_np_path5)
# para_np_path_list = [para_np_path1, para_np_path2, para_np_path3, para_np_path4, para_np_path5]
# para_np_arrary_list = [para_np_arrary1, para_np_arrary2, para_np_arrary3, para_np_arrary4, para_np_arrary5]
# todo: 定义name与para_path之间的关系字典
para_name2para_path = {
    "领导讲话": para_np_path1,
    "专家观点": para_np_path2,
    "政策文件": para_np_path3,
    "企业案例": para_np_path4,
    "其他": para_np_path5
}
para_name2para_np_arrary = {
    "领导讲话": para_np_arrary1,
    "专家观点": para_np_arrary2,
    "政策文件": para_np_arrary3,
    "企业案例": para_np_arrary4,
    "其他": para_np_arrary5
}
# todo: 句子的矩阵文件路径
# sen_np_path1 = "database/sent_database/speech_by_leaders.npy"
sen_np_path1 = "database/sent_database/update_leaders.npy"
sen_np_arrary1 = np.load(sen_np_path1)
# sen_np_path2 = "database/sent_database/expert_opinion.npy"
sen_np_path2 = "database/sent_database/update_expert.npy"
sen_np_arrary2 = np.load(sen_np_path2)
# sen_np_path3 = "database/sent_database/policy_document.npy"
sen_np_path3 = "database/sent_database/update_policy.npy"
sen_np_arrary3 = np.load(sen_np_path3)
sen_np_path4 = "database/sent_database/enterprise_case.npy"
sen_np_arrary4 = np.load(sen_np_path4)
sen_np_path5 = "database/sent_database/other_sent.npy"
sen_np_arrary5 = np.load(sen_np_path5)
# sen_np_path_list = [sen_np_path1, sen_np_path2, sen_np_path3, sen_np_path4, sen_np_path5]
# sen_np_arrary_list = [sen_np_arrary1, sen_np_arrary2, sen_np_arrary3, sen_np_arrary4, sen_np_arrary5]
# todo: 定义name与para_path之间的关系字典
sen_name2sen_path = {
    "领导讲话": sen_np_path1,
    "专家观点": sen_np_path2,
    "政策文件": sen_np_path3,
    "企业案例": sen_np_path4,
    "其他": sen_np_path5
}
sen_name2sen_np_arrary = {
    "领导讲话": sen_np_arrary1,
    "专家观点": sen_np_arrary2,
    "政策文件": sen_np_arrary3,
    "企业案例": sen_np_arrary4,
    "其他": sen_np_arrary5
}


def save_file(length, vector_path, np_path):
    A = zeros((int(length), 769), dtype=float)
    f = open(vector_path)
    lines = f.readlines()
    A_row = 0
    for line in lines:
        list = line.strip('\n').split(' ')
        A[A_row, :] = list[:]
        A_row += 1
    np.save(np_path, A)
    return None


# todo: 定义段落素材入库
def put_para_list(para_list):
    # todo: 导出“其他”素材库构建矩阵
    para_arrary = np.load(para_np_path5)
    # todo: 定义段落保存临时文件路径
    temp_para_vector_path = "database/para_database/temp_other.txt"
    temp_para_np_path = "database/para_database/temp_other.npy"
    length = len(para_list)
    with open(temp_para_vector_path, "w", encoding='utf-8') as f_vectors:
        for row in tqdm(para_list):
            content = row["para_content"]
            _id = row["id"]
            is_main = row["is_main"]
            if is_main == "0":
                continue
            else:
                vector = bc.encode([content])
                f_vectors.write(_id + ' ' + ' '.join(map(str, list(vector[0]))) + '\n')
    f_vectors.close()
    save_file(length, temp_para_vector_path, temp_para_np_path)
    temp_arrary = np.load(temp_para_np_path)
    arr = np.concatenate((para_arrary, temp_arrary), axis=0)
    np.save(para_np_path5, arr)
    flag = "1"
    os.remove(temp_para_vector_path)
    os.remove(temp_para_np_path)
    return flag


# todo: 定义句子素材入库
def put_sen_list(sen_list):
    # todo: 导出“其他”素材库构建矩阵
    sen_arrary = np.load(sen_np_path5)
    # todo: 定义段落保存临时文件路径
    temp_sen_vector_path = "database/sent_database/temp_other.txt"
    temp_sen_np_path = "database/sent_database/temp_other.npy"
    length = len(sen_list)
    with open(temp_sen_vector_path, "w", encoding='utf-8') as f_vectors:
        for row in tqdm(sen_list):
            content = row["sent_content"]
            _id = row["id"]
            is_main = row["is_main"]
            if is_main == "0":
                continue
            else:
                vector = bc.encode([content])
                f_vectors.write(_id + ' ' + ' '.join(map(str, list(vector[0]))) + '\n')
    f_vectors.close()
    save_file(length, temp_sen_vector_path, temp_sen_np_path)
    temp_arrary = np.load(temp_sen_np_path)
    arr = np.concatenate((sen_arrary, temp_arrary), axis=0)
    np.save(para_np_path5, arr)
    flag = "1"
    os.remove(temp_sen_vector_path)
    os.remove(temp_sen_np_path)
    return flag


# # todo: 定义段落并行去重判断
# def get_para_max_id(np_path, text_encode, q1):
#     logger.info("子进程开始执行>>> pid={}".format(os.getpid()))
#     a = text_encode
#     # 导入初始矩阵
#     b = np.load(np_path)
#     # todo: 将初始矩阵转换为目标矩阵，通过先转置，后按行切片获得目标子矩阵,然后对子矩阵再次转置得到
#     c = b.transpose()
#     d = c[1::].transpose()
#     # todo: 此时，id_list(对应从0-N的矩阵索引)可根据第一次转置后的第一行获得
#     id_list = c[0].tolist()
#     # todo: 将矩阵索引与id_list通过定义id_dict关联
#     # 根据行长度初始化矩阵索引np_list
#     np_list = [n for n in range(b.shape[0])]
#     id_dict = dict(zip(np_list, id_list))
#     s = cosine_similarity(a, d)
#     temp = np.max(s)
#     m = np.argmax(s)
#     r, c = divmod(m, s.shape[1])
#     if temp >= duplicated_prob:
#         _id = str(id_dict[c]).split(".")[0]
#         q1.put(_id)
#         return _id

# todo: 定义段落并行去重判断

def get_para_max_id(np_arrary, text_encode_list, q1):
    logger.info("子进程开始执行>>> pid={}".format(os.getpid()))
    a = text_encode_list
    # 导入初始矩阵
    b = np_arrary
    # todo: 将初始矩阵转换为目标矩阵，通过先转置，后按行切片获得目标子矩阵,然后对子矩阵再次转置得到
    c = b.transpose()
    d = c[1::].transpose()
    # todo: 此时，id_list(对应从0-N的矩阵索引)可根据第一次转置后的第一行获得
    id_list = c[0].tolist()
    # todo: 将矩阵索引与id_list通过定义id_dict关联
    # 根据行长度初始化矩阵索引np_list
    np_list = [n for n in range(b.shape[0])]
    id_dict = dict(zip(np_list, id_list))
    s = cosine_similarity(a, d)
    max_values = np.amax(s, axis=1)
    column_list = []
    for i in range(len(s)):
        max_index = 0
        for j in range(1, len(s[i])):
            if s[i][j] > s[i][max_index]:
                max_index = j
        column_list.append(max_index)
    sim_value_list = max_values.tolist()
    sim2column = dict(zip(sim_value_list, column_list))

    id_list = []
    for sim in sim_value_list:
        if sim >= duplicated_prob:
            column_value = sim2column[sim]
            _id = str(id_dict[column_value]).split(".")[0]
        else:
            _id = ""
        id_list.append(_id)

    q1.put(id_list)
    return id_list


# # todo: 定义句子并行去重判断
# def get_sen_max_id(np_path, text_encode, q2):
#     logger.info("子进程开始执行>>> pid={}".format(os.getpid()))
#     a = text_encode
#     # 导入初始矩阵
#     b = np.load(np_path)
#     # todo: 将初始矩阵转换为目标矩阵，通过先转置，后按行切片获得目标子矩阵,然后对子矩阵再次转置得到
#     c = b.transpose()
#     d = c[1::].transpose()
#     # todo: 此时，id_list(对应从0-N的矩阵索引)可根据第一次转置后的第一行获得
#     id_list = c[0].tolist()
#     # todo: 将矩阵索引与id_list通过定义id_dict关联
#     # 根据行长度初始化矩阵索引np_list
#     np_list = [n for n in range(b.shape[0])]
#     id_dict = dict(zip(np_list, id_list))
#     s = cosine_similarity(a, d)
#     temp = np.max(s)
#     m = np.argmax(s)
#     r, c = divmod(m, s.shape[1])
#     if temp >= duplicated_prob:
#         _id = str(id_dict[c]).split(".")[0]
#         q2.put(_id)
#         return _id


# todo: 定义段落列表去重函数

# todo: 定义句子并行去重判断
def get_sen_max_id(np_arrary, text_encode_list, q2):
    logger.info("子进程开始执行>>> pid={}".format(os.getpid()))
    a = text_encode_list
    # 导入初始矩阵
    b = np_arrary
    # todo: 将初始矩阵转换为目标矩阵，通过先转置，后按行切片获得目标子矩阵,然后对子矩阵再次转置得到
    c = b.transpose()
    d = c[1::].transpose()
    # todo: 此时，id_list(对应从0-N的矩阵索引)可根据第一次转置后的第一行获得
    id_list = c[0].tolist()
    # todo: 将矩阵索引与id_list通过定义id_dict关联
    # 根据行长度初始化矩阵索引np_list
    np_list = [n for n in range(b.shape[0])]
    id_dict = dict(zip(np_list, id_list))
    s = cosine_similarity(a, d)
    max_values = np.amax(s, axis=1)
    column_list = []
    for i in range(len(s)):
        max_index = 0
        for j in range(1, len(s[i])):
            if s[i][j] > s[i][max_index]:
                max_index = j
        column_list.append(max_index)
    sim_value_list = max_values.tolist()
    sim2column = dict(zip(sim_value_list, column_list))

    id_list = []
    for sim in sim_value_list:
        if sim >= duplicated_prob:
            column_value = sim2column[sim]
            _id = str(id_dict[column_value]).split(".")[0]
        else:
            _id = ""
        id_list.append(_id)

    q2.put(id_list)
    return id_list


def get_para_duplicated(para_list):
    dup_record = []
    dup_para_list = []
    # todo: 暂做调整，将原先for循环遍历去重改为批量去重
    # 利用pd将list转为df
    df = pd.DataFrame(para_list)
    content_list = df["para_content"].tolist()
    encode_content_list = bc.encode(content_list)
    # todo: 根据字典获取contentTypeName
    contentTypeName_list = ["领导讲话", "专家观点", "政策文件", "企业案例", "其他"]
    # todo: 应用并发方式处理文件
    logger.info("主进程开始执行>>> pid={}".format(os.getpid()))
    start_t = datetime.datetime.now()
    for type_name in contentTypeName_list:
        # np_path = sen_name2sen_path[type_name]
        np_arrary = sen_name2sen_np_arrary[type_name]
        logger.info(np_arrary)
        process = mp.Process(target=get_para_max_id, args=(
            np_arrary, encode_content_list, q1))
        process.start()
        dup_record.append(process)

    # 获取结果
    results = []
    for record_item in dup_record:
        while record_item.is_alive():
            while False == q1.empty():
                temp_results = q1.get()
                results.append(temp_results)
    # todo: 定义并行处理结果list
    res_list = []
    for res in results:
        res_list.append(res)
    logger.info(res_list)

    for process in dup_record:
        process.join()
        process.terminate()

    logger.info("主进程终止")
    end_t = datetime.datetime.now()
    elapsed_sec = (end_t - start_t).total_seconds()
    logger.info("多进程计算 共消耗: " + "{:.2f}".format(elapsed_sec) + " 秒")
    # todo: 追加结果， 其中非 0 为主条目
    temp_list = ["id1", "id2", "id3", "id4"]
    dict_temp_result = dict(zip(temp_list, res_list))
    temp_df = pd.DataFrame(dict_temp_result)
    new_df = temp_df["id1"].str.cat(temp_df["id2"], sep=";").str.cat(temp_df["id3"], sep=";").str.cat(temp_df["id4"],
                                                                                                      sep=";")
    new_id_list = new_df.tolist()
    final_id_list = []
    for id in new_id_list:
        final_id = re.sub(r";;", ";", id).strip(";")
        final_id_list.append(final_id)

    temp_df1 = pd.DataFrame({
        "para_content": content_list,
        "repeatedId": final_id_list
    })

    # todo: 将匹配信息进行整合，包括df + temp1_df
    final_df = pd.merge(df, temp_df1, on="para_content")
    for idx, row in tqdm(final_df.iterrows()):
        if row["repeatedId"]:
            row["is_main"] = "0"
        else:
            row["is_main"] = ""
        dup_para_list.append({
            "create_time": time.strftime("%Y-%m-%d %H:%M:%S"),
            "para_id": row["para_id"],
            "infoId": row["infoId"],
            "para_index": row["para_index"],
            "para_content": row["para_content"],
            "contentTypeIds": row["contentTypeIds"],
            "contentNames": row["contentNames"],
            "topicNames": row["topicNames"],
            "type": row["type"],
            "repeatedId": row["repeatedId"],
            "is_main": row["is_main"]
        })
        # dup_para_list.append(row)

    return dup_para_list

    # for row in tqdm(para_list):
    #     content = row["para_content"]
    #     # todo: 考虑编码服务在并行计算时不友好，先对待匹配文本进行编码
    #     text_encode = bc.encode([content])
    #     # todo: 根据字典获取contentTypeName
    #     contentTypeName_list = ["领导讲话", "专家观点", "政策文件", "企业案例", "其他"]
    #     # todo: 应用并发方式处理文件
    #     logger.info("主进程开始执行>>> pid={}".format(os.getpid()))
    #     start_t = datetime.datetime.now()
    #
    #     for type_name in contentTypeName_list:
    #         np_path = para_name2para_path[type_name]
    #         logger.info(np_path)
    #         process = mp.Process(target=get_para_max_id, args=(
    #         np_path, text_encode, q1))
    #         process.start()
    #         dup_record.append(process)
    #
    #     # 获取结果
    #     results = []
    #     for record_item in dup_record:
    #         while record_item.is_alive():
    #             while False == q1.empty():
    #                 temp_results = q1.get()
    #                 results.append(temp_results)
    #     # todo: 定义并行处理结果list
    #     res_list = []
    #     for res in results:
    #         res_list.append(res)
    #     logger.info(res_list)
    #
    #     for process in dup_record:
    #         process.join()
    #         process.terminate()
    #
    #     logger.info("主进程终止")
    #     end_t = datetime.datetime.now()
    #     elapsed_sec = (end_t - start_t).total_seconds()
    #     logger.info("多进程计算 共消耗: " + "{:.2f}".format(elapsed_sec) + " 秒")
    #     # todo: 追加结果， 其中非 0 为主条目
    #     if res_list:
    #         repeatedId = ";".join(res_list)
    #         is_main = "0"
    #     else:
    #         repeatedId = ""
    #         is_main = ""
    #
    #     dup_para_list.append({
    #         "create_time": time.strftime("%Y-%m-%d %H:%M:%S"),
    #         "para_id": row["para_id"],
    #         "infoId": row["infoId"],
    #         "para_index": row["para_index"],
    #         "para_content": row["para_content"],
    #         "contentTypeIds": row["contentTypeIds"],
    #         "contentNames": row["contentNames"],
    #         "topicNames": row["topicNames"],
    #         "type": row["type"],
    #         "repeatedId": repeatedId,
    #         "is_main": is_main
    #     })
    #
    # return dup_para_list


# todo: 定义句子列表去重函数
def get_sen_duplicated(sen_list):
    sen_dup_record = []
    dup_sen_list = []
    # todo: 暂做调整，将原先for循环遍历去重改为批量去重
    # 利用pd将list转为df
    df = pd.DataFrame(sen_list)
    content_list = df["sent_content"].tolist()
    encode_content_list = bc.encode(content_list)
    # todo: 根据字典获取contentTypeName
    contentTypeName_list = ["领导讲话", "专家观点", "政策文件", "企业案例", "其他"]
    # todo: 应用并发方式处理文件
    logger.info("主进程开始执行>>> pid={}".format(os.getpid()))
    start_t = datetime.datetime.now()
    for type_name in contentTypeName_list:
        # np_path = sen_name2sen_path[type_name]
        np_arrary = sen_name2sen_np_arrary[type_name]
        process = mp.Process(target=get_sen_max_id, args=(
            np_arrary, encode_content_list, q2))
        process.start()
        sen_dup_record.append(process)

    # for row in tqdm(sen_list):
    #     content = row["sent_content"]
    #     # todo: 考虑编码服务在并行计算时不友好，先对待匹配文本进行编码
    #     text_encode = bc.encode([content])
    #     # todo: 根据字典获取contentTypeName
    #     contentTypeName_list = ["领导讲话", "专家观点", "政策文件", "企业案例", "其他"]
    #     # todo: 应用并发方式处理文件
    #     logger.info("主进程开始执行>>> pid={}".format(os.getpid()))
    #     start_t = datetime.datetime.now()
    #     for type_name in contentTypeName_list:
    #         np_path = sen_name2sen_path[type_name]
    #         logger.info(np_path)
    #         process = mp.Process(target=get_sen_max_id, args=(
    #             np_path, text_encode, q2))
    #         process.start()
    #         sen_dup_record.append(process)

        # 获取结果
    results = []
    for record_item in sen_dup_record:
        while record_item.is_alive():
            while False == q2.empty():
                temp_results = q2.get()
                results.append(temp_results)
    # todo: 定义并行处理结果list
    res_list = []
    for res in results:
        res_list.append(res)
    # logger.info(res_list)

    for process in sen_dup_record:
        process.join()
        process.terminate()

    logger.info("主进程终止")
    end_t = datetime.datetime.now()
    elapsed_sec = (end_t - start_t).total_seconds()
    logger.info("多进程计算 共消耗: " + "{:.2f}".format(elapsed_sec) + " 秒")
    # todo: 追加结果， 其中非 0 为主条目
    temp_list = ["id1", "id2", "id3", "id4"]
    dict_temp_result = dict(zip(temp_list, res_list))

    temp_df = pd.DataFrame(dict_temp_result)
    new_df = temp_df["id1"].str.cat(temp_df["id2"], sep=";").str.cat(temp_df["id3"], sep=";").str.cat(temp_df["id4"],
                                                                                                      sep=";")

    new_id_list = new_df.tolist()
    final_id_list = []
    for id in new_id_list:
        final_id = re.sub(r";;", ";", id).strip(";")
        final_id_list.append(final_id)
    # logger.info(final_id_list)

    temp_df1 = pd.DataFrame({
        "sent_content": content_list,
        "repeatedId": final_id_list
    })

    # todo: 将匹配信息进行整合，包括df + temp1_df
    final_df = pd.merge(df, temp_df1, on="sent_content")
    # logger.info(final_df)
    for idx, row in tqdm(final_df.iterrows()):
        if row["repeatedId"]:
            row["is_main"] = "0"
        else:
            row["is_main"] = ""
        dup_sen_list.append({
            "create_time": time.strftime("%Y-%m-%d %H:%M:%S"),
            "sent_id": row["sent_id"],
            "para_id": row["para_id"],
            "infoId": row["infoId"],
            "sent_article_index": row["sent_article_index"],
            "sent_para_index": row["sent_para_index"],
            "sent_content": row["sent_content"],
            "contentTypeIds": row["contentTypeIds"],
            "contentNames": row["contentNames"],
            "topicNames": row["topicNames"],
            "type": row["type"],
            "repeatedId": row["repeatedId"],
            "is_main": row["is_main"]
        })
    # logger.info(dup_sen_list)
    return dup_sen_list

    #     if res_list:
    #         repeatedId = ";".join(res_list)
    #         is_main = "0"
    #     else:
    #         repeatedId = ""
    #         is_main = ""
    #
    #     dup_sen_list.append({
    #         "create_time": time.strftime("%Y-%m-%d %H:%M:%S"),
    #         "sent_id": row["sent_id"],
    #         "para_id": row["para_id"],
    #         "infoId": row["infoId"],
    #         "sent_article_index": row["sent_article_index"],
    #         "sent_para_index": row["sent_para_index"],
    #         "sent_content": row["sent_content"],
    #         "contentTypeIds": row["contentTypeIds"],
    #         "contentNames": row["contentNames"],
    #         "topicNames": row["topicNames"],
    #         "type": row["type"],
    #         "repeatedId": repeatedId,
    #         "is_main": is_main
    #     })
    #
    # return dup_sen_list


# todo: 定义段落的相似性判断函数
def get_para_top(np_arrary, text_encode, topicTypeNames, pStartTime, pEndTime, returenType, q, lock):
    logger.info("子进程开始执行>>> pid={}".format(os.getpid()))
    a = text_encode
    # 导入初始矩阵
    # b = np.load(np_path)
    b = np_arrary
    # todo: 考虑当数据量在4g时，矩阵计算时间超过4秒，先将矩阵进行切片后计算， 当满足条件的内容长度大于30时不进行后续计算
    start0_time = datetime.datetime.now()
    sub_arrarys = np.array_split(b, 500)
    sim_result = []
    id_result = []
    for x in sub_arrarys:
        if len(sim_result) < 30:
            # todo: 将初始矩阵转换为目标矩阵，通过先转置，后按行切片获得目标子矩阵,然后对子矩阵再次转置得到
            c = x.transpose()
            d = c[1::].transpose()
            # todo: 此时，id_list(对应从0-N的矩阵索引)可根据第一次转置后的第一行获得
            id_list = c[0].tolist()
            # 根据行长度初始化矩阵索引np_list
            np_list = [n for n in range(x.shape[0])]
            id_dict = dict(zip(np_list, id_list))
            r = cosine_similarity(a, d)
            target = np.where(r >= sent_prob)
            column_list = target[1].tolist()
            if column_list:
                id_list = [str(id_dict[i]).split(".")[0] for i in column_list]
                sim_list = r[target].tolist()
                sim_result.extend(sim_list)
                id_result.extend(id_list)

        else:
            break
    end0_time = datetime.datetime.now()
    total0_time = (end0_time - start0_time).total_seconds()
    logger.info(len(id_result))
    logger.info("拆分矩阵计算 共消耗: " + "{:.2f}".format(total0_time) + " 秒")

    df1 = pd.DataFrame({
        "id": id_result,
        "sim": sim_result
    })

    test1 = df1.sort_values(by=['sim'], axis=0, ascending=False)
    # todo: 场景1 ->不勾选主题参数
    if len(topicTypeNames) == 0:
        df2 = test1[:10]
        # todo： 场景2 ->勾选主题参数
    else:
        df2 = test1[:30]
    # todo: 先取唯一标识id，并调用es查询获取匹配信息
    new_id_list = df2["id"].tolist()
    try:
        info_df = find_para_info(new_id_list)
    except IndexError:
            return None
    # todo: 将匹配信息进行整合，包括df2 + info_df
    temp_df = pd.merge(df2, info_df, on="id")
    result = []
    for idx, row in tqdm(temp_df.iterrows()):
        _id = row['id']
        para_content = row["content"]
        paragraphid = row["paragraphId"]
        paraindex = row["paraArticleIndex"]
        para_topic_type = row["topicType"]
        para_content_type_name = row["contentTypeName"]
        para_article_id = row["articleId"]
        # todo： 根据段落所在的文章id获取文章信息
        try:
            art_temp_result = find_art_info(para_article_id)
        except IndexError:
            continue
        title = art_temp_result["articleTitle"]
        origin = art_temp_result["origin"]
        time = art_temp_result["articleTime"]
        author = art_temp_result["author"]
        article_content = art_temp_result["content"]
        # todo： 场景1 ->勾选主题参数，判断主题和时间范围
        if topicTypeNames:
            if para_topic_type in topicTypeNames:
                result.append({
                    "content": "<font style='color:red;'>" + para_content + "</font>",
                    "similarity": round(row['sim'], 4),
                    "id": _id,
                    "article_id": para_article_id,
                    "paragraphid": paragraphid,
                    "match_index": paraindex,
                    "topic_type": para_topic_type,
                    "content_type_name": para_content_type_name,
                    "article_content": article_content,
                    "publishDate": time,
                    "author": author,
                    "origin": origin,
                    "title": title,
                    "type": returenType
                })
        # todo: 不勾选主题参数，只判断时间参数
        else:
            result.append({
                "content": "<font style='color:red;'>" + para_content + "</font>",
                "similarity": round(row['sim'], 4),
                "id": _id,
                "article_id": para_article_id,
                "paragraphid": paragraphid,
                "match_index": paraindex,
                "topic_type": para_topic_type,
                "content_type_name": para_content_type_name,
                "article_content": article_content,
                "publishDate": time,
                "author": "",
                "origin": origin,
                "title": title,
                "type": returenType
            })
    q.put(result)

    # # todo: 方案2——根据for循环遍历矩阵来获取信息
    # list_index = []
    # for i in range(r.shape[0]):
    #     for j in range(r.shape[1]):
    #         sim_value = r[i][j]
    #         if sim_value >= para_prob:
    #             list_index.append({
    #                 'sim': sim_value,
    #                 '_id': str(id_dict[j]).split(".")[0]
    #             })
    # # todo: 根据相似值对内容进行排序
    # name = ['_id', 'sim']
    # df = pd.DataFrame(columns=name, data=list(list_index))
    # test = df.sort_values(by=['sim'], axis=0, ascending=False)
    # # todo: 场景1 ->不勾选主题参数
    # if len(topicTypeNames) == 0:
    #     # df1 = test[:30]
    #     df1 = test[:10]
    #     # df1 = test[pre_index:suf_index]
    # # todo： 场景2 ->勾选主题参数
    # else:
    #     # df1 = test[:100]
    #     df1 = test[:30]
    #     # df1 = test[pre_index:4 * suf_index]
    # # 定义结果文件
    # result = []
    # for idx, row in tqdm(df1.iterrows()):
    #     _id = row['_id']
    #     # todo: 根据唯一标识id获取段落信息, 获取方式为调用es查询接口
    #     para_temp_result = find_info(_id)
    #     para_content = para_temp_result["content"]
    #     paragraphid = para_temp_result["paragraphId"]
    #     paraindex = para_temp_result["paraArticleIndex"]
    #     para_topic_type = para_temp_result["topicType"]
    #     para_content_type_name = para_temp_result["contentTypeName"]
    #     para_article_id = para_temp_result["articleId"]
    #     # todo： 根据段落所在的文章id获取文章信息
    #     art_temp_result = find_art_info(para_article_id)
    #     title = art_temp_result["articleTitle"]
    #     origin = art_temp_result["origin"]
    #     time = art_temp_result["articleTime"]
    #     author = art_temp_result["author"]
    #     article_content = art_temp_result["content"]
    #
    #     # todo： 场景1 ->勾选主题参数，判断主题和时间范围
    #     if topicTypeNames:
    #         # if para_topic_type in topicTypeNames and pStartTime <= time <= pEndTime:
    #         if para_topic_type in topicTypeNames:
    #             result.append({
    #                 "content":  "<font style='color:red;'>" + para_content + "</font>",
    #                 "similarity": round(row['sim'], 4),
    #                 "id": _id,
    #                 "article_id": para_article_id,
    #                 "paragraphid": paragraphid,
    #                 "match_index": paraindex,
    #                 "topic_type": para_topic_type,
    #                 "content_type_name": para_content_type_name,
    #                 "article_content": article_content,
    #                 "publishDate": time,
    #                 "author": author,
    #                 "origin": origin,
    #                 "title": title,
    #                 "type": returenType
    #             })
    #     # todo: 不勾选主题参数，只判断时间参数
    #     else:
    #         # if pStartTime <= time <= pEndTime:
    #         result.append({
    #             "content": "<font style='color:red;'>" + para_content + "</font>",
    #             "similarity": round(row['sim'], 4),
    #             "id": _id,
    #             "article_id": para_article_id,
    #             "paragraphid": paragraphid,
    #             "match_index": paraindex,
    #             "topic_type": para_topic_type,
    #             "content_type_name": para_content_type_name,
    #             "article_content": article_content,
    #             "publishDate": time,
    #             "author": "",
    #             "origin": origin,
    #             "title": title,
    #             "type": returenType
    #         })
    # # 释放
    # # sem.release()
    # q.put(result)
    # lock.release()
    return result


# todo: 定义句子的相似性判断函数
def get_sen_top(np_arrary, text_encode, topicTypeNames, pStartTime, pEndTime, returenType, q, lock):
    logger.info("子进程开始执行>>> pid={}".format(os.getpid()))
    a = text_encode
    # 导入初始矩阵
    # b = np.load(np_path)
    b = np_arrary
    # todo: 考虑当数据量在4g时，矩阵计算时间超过4秒，先将矩阵进行切片后计算， 当满足条件的内容长度大于30时不进行后续计算
    start0_time = datetime.datetime.now()
    sub_arrarys = np.array_split(b, 500)
    sim_result = []
    id_result = []
    for x in sub_arrarys:
        if len(sim_result) < 30:
            # todo: 将初始矩阵转换为目标矩阵，通过先转置，后按行切片获得目标子矩阵,然后对子矩阵再次转置得到
            c = x.transpose()
            d = c[1::].transpose()
            # todo: 此时，id_list(对应从0-N的矩阵索引)可根据第一次转置后的第一行获得
            id_list = c[0].tolist()
            # 根据行长度初始化矩阵索引np_list
            np_list = [n for n in range(x.shape[0])]
            id_dict = dict(zip(np_list, id_list))
            r = cosine_similarity(a, d)
            target = np.where(r >= sent_prob)
            column_list = target[1].tolist()
            if column_list:
                id_list = [str(id_dict[i]).split(".")[0] for i in column_list]
                sim_list = r[target].tolist()
                sim_result.extend(sim_list)
                id_result.extend(id_list)

        else:
            break
    end0_time = datetime.datetime.now()
    total0_time = (end0_time - start0_time).total_seconds()
    logger.info(len(id_result))
    logger.info("拆分矩阵计算 共消耗: " + "{:.2f}".format(total0_time) + " 秒")

    df1 = pd.DataFrame({
        "id": id_result,
        "sim": sim_result
    })
    test1 = df1.sort_values(by=['sim'], axis=0, ascending=False)
    # todo: 场景1 ->不勾选主题参数
    if len(topicTypeNames) == 0:
        df2 = test1[:10]
        # todo： 场景2 ->勾选主题参数
    else:
        df2 = test1[:30]
    # todo: 先取唯一标识id，并调用es查询获取匹配信息
    new_id_list = df2["id"].tolist()
    # todo: 记录es查询结果时间
    start2_time = datetime.datetime.now()
    try:
        info_df = find_sent_info(new_id_list)
    except IndexError:
        return None

    # todo: 将匹配信息进行整合，包括df2 + info_df
    temp_df = pd.merge(df2, info_df, on="id")
    # # todo： 根据段落所在的文章id获取文章信息
    # new_art_id_list = temp_df["articleId"].tolist()
    # art_info_df = find_art_info(new_art_id_list)
    # # todo: 将文章信息整合到temp_df
    # final_df = pd.merge(temp_df, art_info_df, on="articleId")
    # print(final_df)
    result = []

    for idx, row in tqdm(temp_df.iterrows()):
        sentence_id = row["sentenceId"]
        sent_article_id = row["articleId"]
        sent_content = row["content"]
        # todo： 根据句子所在的文章id获取文章信息
        try:
            art_temp_result = find_art_info(sent_article_id)
        except IndexError:
            continue
        title = art_temp_result["articleTitle"]
        origin = art_temp_result["origin"]
        time = art_temp_result["articleTime"]
        author = art_temp_result["author"]
        article_content = art_temp_result["content"]

        # todo: 根据sentence_id 和 sent_article_id 获取前后句
        final_content = find_sen_content(sent_article_id, sentence_id, sent_content)
        # todo： 场景1 ->勾选主题参数，判断主题和时间范围
        if topicTypeNames:
            if row["topicType"] in topicTypeNames:
                result.append({
                    "content": final_content,
                    "similarity": round(row['sim'], 4),
                    "id": row["id"],
                    "article_id": sent_article_id,
                    "paragraphid": row["paragraphId"],
                    "match_index": row["sentParaIndex"] + ";" + row["sentArticleIndex"],
                    "topic_type": row["topicType"],
                    "content_type_name": row["contentTypeName"],
                    "article_content": article_content,
                    "publishDate": time,
                    "author": author,
                    "origin": origin,
                    "title": title,
                    "type": returenType
                })

        # todo: 场景2 -> 不勾选类型参数， 仅判断事件范围
        else:
            result.append({
                "content": final_content,
                "similarity": round(row['sim'], 4),
                "id": row["id"],
                "article_id": sent_article_id,
                "paragraphid": row["paragraphId"],
                "match_index": row["sentParaIndex"] + ";" + row["sentArticleIndex"],
                "topic_type": row["topicType"],
                "article_content": article_content,
                "publishDate": time,
                "author": author,
                "origin": origin,
                "title": title,
                "type": returenType
            })
    end2_time = datetime.datetime.now()
    total2_time = (end2_time - start2_time).total_seconds()
    logger.info("es查询内容 共消耗: " + "{:.2f}".format(total2_time) + " 秒")
    q.put(result)

    # # todo: 方案2——根据for循环遍历矩阵来获取信息
    # list_index = []
    # start2_time = datetime.datetime.now()
    # for i in range(r.shape[0]):
    #     for j in range(r.shape[1]):
    #         sim_value = r[i][j]
    #         if sim_value >= sent_prob:
    #             list_index.append({
    #                 'sim': sim_value,
    #                 '_id': str(id_dict[j]).split(".")[0]
    #             })
    # # todo: 根据相似值对内容进行排序
    # name = ['_id', 'sim']
    # df = pd.DataFrame(columns=name, data=list(list_index))
    # test = df.sort_values(by=['sim'], axis=0, ascending=False)
    # end2_time = datetime.datetime.now()
    # total2_time = (end2_time - start2_time).total_seconds()
    # logger.info("for循环遍历 共消耗: " + "{:.2f}".format(total2_time) + " 秒")

    # # todo: 场景1 ->不勾选主题参数
    # if len(topicTypeNames) == 0:
    #     df1 = test[:10]
    # # todo： 场景2 ->勾选主题参数
    # else:
    #     df1 = test[:30]
    # # 定义结果文件
    # result = []
    #
    # for idx, row in tqdm(df1.iterrows()):
    #     _id = row['_id']
    #     # todo: 根据唯一标识id获取段落信息, 获取方式为调用es查询接口
    #     sen_temp_result = find_info(_id)
    #     sent_content = sen_temp_result["content"]
    #     paragraph_id = sen_temp_result["paragraphId"]
    #     sent_para_index = sen_temp_result["sentParaIndex"]
    #     sent_article_index = sen_temp_result["sentArticleIndex"]
    #     sent_topic_type = sen_temp_result["topicType"]
    #     sent_content_type_name = sen_temp_result["contentTypeName"]
    #     sent_article_id = sen_temp_result["articleId"]
    #     sentence_id = sen_temp_result["sentenceId"]
    #     # todo： 根据段落所在的文章id获取文章信息
    #     art_temp_result = find_art_info(sent_article_id)
    #     title = art_temp_result["articleTitle"]
    #     origin = art_temp_result["origin"]
    #     time = art_temp_result["articleTime"]
    #     author = art_temp_result["author"]
    #     article_content = art_temp_result["content"]
    #
    #     # todo: 根据sentence_id 和 sent_article_id 获取前后句
    #     # pre_sent, suf_sent = find_sen_content(sent_article_id, sentence_id)
    #     final_content = find_sen_content(sent_article_id, sentence_id, sent_content),
    #     # pre_sent = find_sen_content(sent_article_id, str(int(sentence_id) - 1))
    #     #
    #     # suf_sent = find_sen_content(sent_article_id, str(int(sentence_id) + 1))
    #
    #     # todo： 场景1 ->勾选主题参数，判断主题和时间范围
    #     if topicTypeNames:
    #         # if sent_topic_type in topicTypeNames and pStartTime <= time <= pEndTime:
    #         if sent_topic_type in topicTypeNames:
    #             result.append({
    #                 # "content": pre_sent + "<font style='color:red;'>" + sent_content + "</font>" + suf_sent,
    #                 "content": final_content,
    #                 "similarity": round(row['sim'], 4),
    #                 "id": _id,
    #                 "article_id": sent_article_id,
    #                 "paragraphid": paragraph_id,
    #                 "match_index": sent_para_index + ";" + sent_article_index,
    #                 # "sent_article_index": sent_article_index,
    #                 "topic_type": sent_topic_type,
    #                 "content_type_name": sent_content_type_name,
    #                 "article_content": article_content,
    #                 "publishDate": time,
    #                 "author": author,
    #                 "origin": origin,
    #                 "title": title,
    #                 "type": returenType
    #             })
    #
    #     # todo: 场景2 -> 不勾选类型参数， 仅判断事件范围
    #     else:
    #         # if pStartTime <= time <= pEndTime:
    #         result.append({
    #             "content": final_content,
    #             # "content": pre_sent + "<font style='color:red;'>" + sent_content + "</font>" + suf_sent,
    #             "similarity": round(row['sim'], 4),
    #             "id": _id,
    #             "article_id": sent_article_id,
    #             "paragraphid": paragraph_id,
    #             "match_index": sent_para_index + ";" + sent_article_index,
    #             # "sent_article_index": sent_article_index,
    #             "topic_type": sent_topic_type,
    #             "content_type_name": sent_content_type_name,
    #             "article_content": article_content,
    #             "publishDate": time,
    #             "author": author,
    #             "origin": origin,
    #             "title": title,
    #             "type": returenType
    #         })
    # q.put(result)
    # lock.release()

    return result


def get_para_result(text: str, contentTypeFlags: list, topicTypeNames: list, pStartTime: str, pEndTime: str, pageSize: int, pageNo: int, returenType: str):
    # todo: 考虑编码服务在并行计算时不友好，先对待匹配文本进行编码
    record = []
    text_encode = bc.encode([text])
    # todo: 根据字典获取contentTypeName
    contentTypeName_list = []
    if contentTypeFlags:
        for type in contentTypeFlags:
            content_type_name = type2namedict[type]
            contentTypeName_list.append(content_type_name)
    else:
        contentTypeName_list = ["领导讲话", "专家观点", "政策文件", "企业案例", "其他"]
    logger.info(contentTypeName_list)
    # todo: 应用并发方式处理文件
    logger.info("主进程开始执行>>> pid={}".format(os.getpid()))
    start_t = datetime.datetime.now()

    for type_name in contentTypeName_list:
        # np_path = para_name2para_path[type_name]
        np_arrary = para_name2para_np_arrary[type_name]
        # 创建线程对象
        process = mp.Process(target=get_para_top, args=(np_arrary, text_encode, topicTypeNames, pStartTime, pEndTime, returenType, q, lock))
        process.start()
        record.append(process)

    # 获取结果
    results = []
    for record_item in record:
        while record_item.is_alive():
            while False == q.empty():
                temp_results = q.get()
                results.append(temp_results)
    # todo: 定义并行处理结果list
    res_list = []
    for res in results:
        res_list.extend(res)

    for process in record:
        process.join()
        process.terminate()
    logger.info("主进程终止")
    end_t = datetime.datetime.now()
    elapsed_sec = (end_t - start_t).total_seconds()
    logger.info("多进程计算 共消耗: " + "{:.2f}".format(elapsed_sec) + " 秒")
    df2 = pd.DataFrame(res_list, columns=["content", "similarity", "id", "article_id", "paragraphid", "match_index", "topic_type", "content_type_name",
                                          "article_content", "publishDate", "author", "origin", "title", "type"])
    df2.drop_duplicates(subset=["content"], keep="first", inplace=True)
    # todo: 将df 转为list
    final_dict = df2.to_dict()
    result_list = [dict(zip(final_dict, values)) for values in zip(*[final_dict[k].values() for k in final_dict])]
    result_list = result_list[:100] if len(result_list) >= 100 else result_list
    return result_list, len(result_list)


def get_sent_result(text: str, contentTypeFlags: list, topicTypeNames: list, pStartTime: str, pEndTime: str, pageSize: int, pageNo: int, returenType: str ):
    # todo: 考虑编码服务在并行计算时不友好，先对待匹配文本进行编码
    record = []
    text_encode = bc.encode([text])
    # todo: 根据字典获取contentTypeName
    contentTypeName_list = []
    if contentTypeFlags:
        for type in contentTypeFlags:
            content_type_name = type2namedict[type]
            contentTypeName_list.append(content_type_name)
    else:
        contentTypeName_list = ["领导讲话", "专家观点", "政策文件", "企业案例", "其他"]
    logger.info(contentTypeName_list)
    # todo: 应用并发方式处理文件
    logger.info("主进程开始执行>>> pid={}".format(os.getpid()))
    start_t = datetime.datetime.now()
    for type_name in contentTypeName_list:
        np_arrary = sen_name2sen_np_arrary[type_name]
        # np_path = sen_name2sen_path[type_name]
        # 创建线程对象
        process = mp.Process(target=get_sen_top,
                             args=(np_arrary, text_encode, topicTypeNames, pStartTime, pEndTime, returenType, q, lock))
        process.start()
        record.append(process)

    # 获取结果
    results = []
    for record_item in record:
        while record_item.is_alive():
            while False == q.empty():
                temp_results = q.get()
                results.append(temp_results)
    # todo: 定义并行处理结果list
    res_list = []
    for res in results:
        res_list.extend(res)
    for process in record:
        process.join()
        process.terminate()

    logger.info("主进程终止")
    end_t = datetime.datetime.now()
    elapsed_sec = (end_t - start_t).total_seconds()
    logger.info("多进程计算 共消耗: " + "{:.2f}".format(elapsed_sec) + " 秒")

    df2 = pd.DataFrame(res_list, columns=["content", "similarity", "id", "article_id", "paragraphid", "match_index", "topic_type", "content_type_name",
                                          "article_content", "publishDate", "author", "origin", "title", "type"])
    df2.drop_duplicates(subset=["content"], keep="first", inplace=True)
    # todo: 将df 转为list
    final_dict = df2.to_dict()
    result_list = [dict(zip(final_dict, values)) for values in zip(*[final_dict[k].values() for k in final_dict])]
    result_list = result_list[:100] if len(result_list) >= 100 else result_list
    return result_list, len(result_list)


if __name__ == "__main__":
    sent_list = [
            {
                "create_time": "2023-01-03 18:02:26",
                "sent_id": "1",
                "para_id": "1",
                "infoId": "",
                "sent_article_index": "2|87",
                "sent_para_index": "2|87",
                "sent_content": "强化创新引领 加快“三个转变” 更好推动中国制造高质量发展——国资委党委委员、副主任 翁杰明——制造业是立国之本、强国之基，以习近平同志为核心的党中央高度重视制造强国建设。",
                "contentTypeIds": "1602095566267805697",
                "contentNames": "领导讲话",
                "topicNames": "产业链链长",
                "type": "sen",
            },
            {
                "create_time": "2023-01-03 18:02:28",
                "sent_id": "2",
                "para_id": "1",
                "infoId": "",
                "sent_article_index": "88|155",
                "sent_para_index": "88|155",
                "sent_content": "2014年5月10日，习近平总书记在中铁装备视察时首次提出，推动中国制造向中国创造转变、中国速度向中国质量转变、中国产品向中国品牌转变。",
                "contentTypeIds": "1602095566267805697",
                "contentNames": "领导讲话",
                "topicNames": "产业链链长",
                "type": "sen",
            },
            {
                "create_time": "2023-01-03 18:02:30",
                "sent_id": "3",
                "para_id": "1",
                "infoId": "",
                "sent_article_index": "156|223",
                "sent_para_index": "156|223",
                "sent_content": "习近平总书记关于“三个转变”的重要指示为中国制造高质量发展指明了方向、提供了根本遵循，国务院国资委和中央企业深入学习领会、坚决贯彻落实。",
                "contentTypeIds": "1602095566267805697",
                "contentNames": "领导讲话",
                "topicNames": "产业链链长",
                "type": "sen",
            },
            {
                "create_time": "2023-01-03 18:02:32",
                "sent_id": "4",
                "para_id": "1",
                "infoId": "",
                "sent_article_index": "224|409",
                "sent_para_index": "224|409",
                "sent_content": "近年来，国务院国资委专门出台质量品牌工作系列文件，强化考核引导激励，建立长效机制，搭建了“中国品牌论坛”、“数字中国建设峰会”、“双创”示范基地等一系列助力企业高质量发展的专业化平台，引导支持中央企业积极打造原创技术“策源地”和现代产业链“链长”，有效推动中央企业激发创新潜力、增强发展动力，在高端装备制造领域取得一系列突破性、标志性重大成果，一大批“国之重器”横空出世。",
                "contentTypeIds": "1602095566267805697",
                "contentNames": "领导讲话",
                "topicNames": "产业链链长",
                "type": "sen",
            },
            {
                "create_time": "2023-01-03 18:02:34",
                "sent_id": "5",
                "para_id": "1",
                "infoId": "",
                "sent_article_index": "410|537",
                "sent_para_index": "410|537",
                "sent_content": "无论是代表国家实力的天宫探梦、嫦娥奔月、北斗导航，还是捍卫国家主权的航空母舰、东风导弹、歼20、运20，无论是享誉“一带一路”的中国桥、中国路、中国港，还是成为中国名片的高速铁路、华龙一号、5G通信网络，中央企业都发挥了重要作用，彰显了大国重器的责任担当。",
                "contentTypeIds": "1602095566267805697",
                "contentNames": "领导讲话",
                "topicNames": "产业链链长",
                "type": "sen",
            }
        ]
    temp_result = get_sen_duplicated(sent_list)[4]["sent_content"]
    print(temp_result)


    # print(type(get_sen_duplicated(sent_list)))

    # ["speech_by_leaders"]
    # contentTypeFlags = []
    # # ["共同富裕"]
    # topicTypeNames = []
    # pStartTime = ""
    # pEndTime = ""
    # returenType = "par"
    # text = "共同富裕"
    # pageNo = 1
    # pageSize = 10
    # result_list, len_list = get_sent_result(text, contentTypeFlags, topicTypeNames, pStartTime, pEndTime, pageSize, pageNo, returenType)
    # # print(result_list)
    # logger.info("查询已失效，请重新查询！")
    # cache_list = memory_cache.get_value(text)
    # if cache_list:
    #     result_two = cache_list[10:20]
    #     print(result_two)
    #     result_three = cache_list[20:30]
    #     print(result_three)
    # else:
    #     print("查询已失效，请重新查询！")








