#!/user/bin/env python
# coding=utf-8
"""
@project : 500_资讯
@author  : bruxelles_li
@file   : lac_ner_text.py
@ide    : PyCharm
@time   : 2022-07-04 09:19:43
"""
from LAC import LAC
import pandas as pd
import tqdm
import re
lac = LAC(mode="lac")


# 句子提取人名
def lac_username(sentences):
    # 装载LAC模型
    user_name_list = []
    lac = LAC(mode="lac")
    lac_result = lac.run(sentences)
    # print(lac_result)
    for index, lac_label in enumerate(lac_result[1]):
        if lac_label == "PER":
            user_name_list.append(lac_result[0][index])
            # print(user_name_list)
    # print(user_name_list)
    return user_name_list


# 句子提取机构名
def lac_organize_name(sentences):
    # 装载LAC模型
    user_name_list = []
    lac = LAC(mode="lac")
    lac_result = lac.run(sentences)
    # print(lac_result)
    for index, lac_label in enumerate(lac_result[1]):
        if lac_label == "ORG":
            user_name_list.append(lac_result[0][index])
    return user_name_list


# 句子提取地名
def lac_location_name(sentences):
    # 装载LAC模型
    user_name_list = []
    lac = LAC(mode="lac")
    lac_result = lac.run(sentences)
    # print(lac_result)
    for index, lac_label in enumerate(lac_result[1]):
        if lac_label == "LOC":
            user_name_list.append(lac_result[0][index])
    return user_name_list


def match_text_one(rule, text):
    # rule = ";".join(new_one)
    # print(rule)
    # text_one = match_text_one(rule, title)
    # print(text_one)
    rules = '|'.join(rule.split(';')).strip('\n')
    replaced_rules = rules.replace('.', '\.')\
                          .replace('*', '\*')\
                          .replace('(', '\(')\
                          .replace(')', '\)')\
                          .replace('+', '.+')
    pattern = re.compile(r'' + replaced_rules)
    print(pattern)
    match_result = re.sub(pattern, "A", text)
    print(match_result)
    return match_result


if __name__ == '__main__':
    text_path = ""
    data_df = pd.read_excel(text_path, nrows=1).astype(str)
    result_list = []
    for idx, row in tqdm.tqdm(data_df.iterrows()):
        title = row['title']
        a_user = lac_username(title)
        a_organize = lac_organize_name(title)
        a_location = lac_location_name(title)
        if a_user:
            user_rule = '|'.join(a_user).strip()
            pattern0 = re.compile(r'' + user_rule)
            result_one = re.sub(pattern0, 'A', title)
            title = result_one
            if a_organize:
                a_organize_rule = '|'.join(a_organize).strip()
                pattern1 = re.compile(r'' + a_organize_rule)
                result_two = re.sub(pattern1, 'B', result_one)
                title = result_two
                if a_location:
                    a_location_rule = '|'.join(a_location).strip()
                    pattern2 = re.compile(r'' + a_location_rule)
                    print(pattern2)
                    result_three = re.sub(pattern2, 'C', result_two)
                    print(result_three)
                    title = result_three

        row['title'] = title
        result_list.append(row)
        print(result_list)

        # new_one = a_user + a_organize + a_location
        # rule = "|".join(new_one)
        # pattern = re.compile(r'' + rule)
        # result_one = re.sub(pattern, "A", title)
        # title = result_one
        # print(title)








