#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @File    : es_byid.py
# @Time    : 2022/12/27 13:41
# @Author  : bruxelles_li
# @Software: PyCharm
import requests
import json
import pandas as pd


# todo: 根据唯一标识id调用es查询接口目标信息
def find_sent_info(_id: list):
    size = len(_id)
    """
    :param _id:     "1670844082074304"
    :return:
    """
    url = "http://114.115.215.250:9700/ai_report_material/_search"
    # todo: 传入list_id
    payload = json.dumps({
        "query": {
            "bool": {
                "must": [
                    {
                        "terms": {
                            "id": _id
                        }
                    }
                ]
            }
        },
        "track_total_hits": True,
        "size": size
    })
    headers = {
        'Authorization': 'Basic ZWxhc3RpYzp6enNuOTk4OA==',
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    result_data = response.text.encode("utf-8")
    obj_result = json.loads(result_data)
    # todo: 解析出目标数据， 数据对象为list
    hits_obj = obj_result["hits"]["hits"]
    _source_list = [hits_obj[i]["_source"] for i in range(0, size)]
    df = pd.DataFrame(_source_list)
    df = df[["id", "content", "paragraphId", "sentParaIndex", "sentArticleIndex", "topicType", "contentTypeName",
             "articleId", "sentenceId"]]
    return df

    # if hits_obj:
    #     # todo：当前list长度为1，取第一个元素中即为目标数据存在范围
    #     temp_result = hits_obj[0]["_source"]
    #     # todo: 此时带查询数据内容为dict对象
    #     # print(temp_result)
    #
    # else:
    #     temp_result = ""

    # return temp_result
    # return hits_obj
    # return _source_list


# todo: 根据唯一标识id调用es查询接口目标信息
def find_para_info(_id: list):
    size = len(_id)
    """
    :param _id:     "1670844082074304"
    :return:
    """
    url = "http://114.115.215.250:9700/ai_report_material/_search"
    # todo: 传入list_id
    payload = json.dumps({
        "query": {
            "bool": {
                "must": [
                    {
                        "terms": {
                            "id": _id
                        }
                    }
                ]
            }
        },
        "track_total_hits": True,
        "size": size
    })
    headers = {
        'Authorization': 'Basic ZWxhc3RpYzp6enNuOTk4OA==',
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    result_data = response.text.encode("utf-8")
    obj_result = json.loads(result_data)
    # todo: 解析出目标数据， 数据对象为list
    hits_obj = obj_result["hits"]["hits"]
    _source_list = [hits_obj[i]["_source"] for i in range(0, size)]
    df = pd.DataFrame(_source_list)
    df = df[["id", "content", "paragraphId", "paraArticleIndex", "topicType", "contentTypeName",
             "articleId"]]
    return df

    # if hits_obj:
    #     # todo：当前list长度为1，取第一个元素中即为目标数据存在范围
    #     temp_result = hits_obj[0]["_source"]
    #     # todo: 此时带查询数据内容为dict对象
    #     # print(temp_result)
    #
    # else:
    #     temp_result = ""

    # return temp_result
    # return hits_obj
    # return _source_list


# todo: 根据文章id和句子id调用es查询接口目标句子前后内容
def find_sen_content(sent_article_id: str, sentence_id: str, sent_content):

    """
    :param article_id:
    :param sentence_id:
    :return:
    """
    url = "http://114.115.215.250:9700/ai_report_material/_search"

    payload = json.dumps({
       "query": {
           "bool": {
               "must": [
                   {
                       "terms": {
                           "articleId": [sent_article_id, sent_article_id]     # ["1670829370466076"]
                       }
                   },
                   {
                       "terms": {
                           # "sentence_id": sentence_id     # ["1670843538527672"]
                           "sentenceId": [str(int(sentence_id) - 1), str(int(sentence_id) + 1)]
                       }
                   }
               ]
           }
       },
       "track_total_hits": True
       # "size": 1
    })
    headers = {
       'Authorization': 'Basic ZWxhc3RpYzp6enNuOTk4OA==',
       'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    result_data = response.text.encode("utf-8")
    obj_result = json.loads(result_data)
    # todo: 解析出目标数据， 数据对象为list
    hits_obj = obj_result["hits"]["hits"]
    if len(hits_obj) >= 2:
        # todo：当前list长度为1，取第一个元素中即为目标数据存在范围
        pre_temp_result = hits_obj[0]["_source"]
        pre_temp_content = pre_temp_result["content"]
        suf_temp_result = hits_obj[1]["_source"]
        suf_temp_content = suf_temp_result["content"]
    elif 1 <= len(hits_obj) < 2:
        if hits_obj[0]["_source"]["sentenceId"] == str(int(sentence_id) - 1):
            pre_temp_content = hits_obj[0]["_source"]["content"]
            suf_temp_content = ""
        else:
            pre_temp_content = ""
            suf_temp_content = hits_obj[0]["_source"]["content"]
    else:
        pre_temp_content = ""
        suf_temp_content = ""

    content = pre_temp_content + "<font style='color:red;'>" + sent_content + "</font>" + suf_temp_content
    return content


# todo: 根据文章id查询文章信息
def find_art_info(article_id: str):
    # size = len(article_id)
    """
    :param article_id:
    :return:
    """
    url = "http://114.115.215.250:9700/ai_report_material/_search"

    payload = json.dumps({
        "query": {
            "bool": {
                "must": [
                    {
                        "term": {
                            "articleId": article_id   # "1670829371705726"
                        }
                    },
                    {
                        "term": {
                            "type": "art"
                        }
                    }
                ]
            }
        },
        "track_total_hits": True,
        "size": 1
    })
    headers = {
        'Authorization': 'Basic ZWxhc3RpYzp6enNuOTk4OA==',
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    # print(response.text)
    result_data = response.text.encode("utf-8")
    obj_result = json.loads(result_data)
    # todo: 解析出目标数据， 数据对象为list
    hits_obj = obj_result["hits"]["hits"]
    # print(len(hits_obj), len(article_id))
    temp_result = hits_obj[0]["_source"]
    # _source_list = [hits_obj[i]["_source"] for i in range(len(article_id))]
    # df = pd.DataFrame(_source_list)
    # df = df[["articleId", "articleTitle", "origin", "articleTime", "author", "content"]]
    # df.rename(columns={"content": "article_content"}, inplace=True)
    # return df
    return temp_result
    # _source_list = [hits_obj[i]["_source"] for i in range(size)]
    # df = pd.DataFrame(_source_list)
    # df = df[["articleId", "articleTitle", "origin", "articleTime", "author", "content"]]
    # df.rename(columns={"content": "article_content"}, inplace=True)
    # return df
    # if hits_obj:
    #     # todo：当前list长度为1，取第一个元素中即为目标数据存在范围
    #     temp_result = hits_obj[0]["_source"]
    #     # temp_content = temp_result["content"]
    #     # todo: 此时带查询数据内容为dict对象
    #     # print(temp_result)
    #
    # else:
    #     temp_result = ""
    #
    # return temp_result
    # return hits_obj


if __name__ == "__main__":
    #
    _id = ["1670844082008296", "1670844082007284"]
    find_sent_info(_id)
    # print(find_content("1670829371705726", "1670844082074304"))
    # articleId = ["1670829370466076", "1670829371705726"]
    # print(find_art_info(articleId))
    # sent_article_id, sentence_id = "1670829371705726", "1"
    # pre_sent, suf_sent = find_sen_content([sent_article_id, sent_article_id], sentence_id)
    # print(pre_sent)
    # print(suf_sent)
