#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time    : 2021/8/12 19:54
# @Author  : 程婷婷
# @FileName: basic.py
# @Software: PyCharm
import os
import jieba
import json
import requests
import jionlp as jio
from ltp import LTP
import jieba.analyse
import ahocorasick
import pandas as pd
from gensim.models.keyedvectors import KeyedVectors
from platform_zzsn.settings import BASE_DIR
from platform_base.views import base_utils

General_dict = base_utils.read_txt(os.path.join(BASE_DIR, 'static/base/dict_sogou.txt'))
General_dict_ = ''
for key in General_dict:
    General_dict_ += ' ' + str(key.strip())

def word_cut(text):
    ltp = LTP()
    sentences = ltp.sent_split([text])
    segment, _ = ltp.seg(sentences)
    return segment

def word_pos(text):
    ltp = LTP()
    sentences = ltp.sent_split([text])
    segment, hidden = ltp.seg(sentences)
    pos = ltp.pos(hidden)
    return segment, pos

class AC_Unicode:
    """稍微封装一下，弄个支持unicode的AC自动机
    """
    def __init__(self):
        self.ac = ahocorasick.Automaton()
    def add_word(self, k, v):
        # k = k.encode('utf-8')
        return self.ac.add_word(k, v)
    def make_automaton(self):
        return self.ac.make_automaton()
    def iter(self, s):
        # 搜索文本中存在的单词
        # s = s.encode('utf-8')
        return self.ac.iter(s)

def new_words_find(text):
    words = list(jieba.cut(text, HMM=True))
    words_copy = words.copy()
    ac = AC_Unicode()
    sign = [0] * len(words_copy)
    for word in words:
        if len(word) >= 2:
            ac.add_word(word, word)
    ac.make_automaton()
    result_ac = ac.iter(General_dict_)
    for index, key in result_ac:
        try:
            words.remove(key)
        except:
            continue
    for index, word in enumerate(words_copy):
        if (len(word) >= 2) and (word in words):
            sign[index] = 1
    return words_copy, sign

def show_srl(text):
    ltp = LTP()
    sentences = ltp.sent_split([text])
    sentences_srl_dict, sentences_seg_dict = {}, {}
    for i, sentence in enumerate(sentences):
        seg, hidden = ltp.seg([sentence])
        srl = ltp.srl(hidden, keep_empty=False)
        sentences_seg_dict['句子' + str(i+1)+'：'+str(sentence)] = seg[0]
        sentences_srl_dict['句子'+str(i+1)+'：'+str(sentence)] = srl[0]
    return sentences_seg_dict, sentences_srl_dict

def show_dep(text):
    ltp = LTP()
    sentences = ltp.sent_split([text])
    sentences_dep_dict, sentences_seg_dict = {}, {}
    for i, sentence in enumerate(sentences):
        seg, hidden = ltp.seg([sentence])
        dep = ltp.dep(hidden)
        sentences_seg_dict['句子'+str(i+1)+'：'+str(sentence)] = seg[0]
        sentences_dep_dict['句子'+str(i+1)+'：'+str(sentence)] = dep[0]
    return sentences_seg_dict, sentences_dep_dict

def create_keywords(text:str, topK:int, with_weight:bool)->list:
    print(type(topK))
    keywords = jio.keyphrase.extract_keyphrase(text, top_k=topK, with_weight=with_weight)
    print(keywords)
    return keywords

def ner(text):
    ltp = LTP()
    seg, hidden = ltp.seg([text])
    entity = ltp.ner(hidden)
    return seg[0], entity[0]

def related_word_recommendation(words, word_num):
    # print(model.wv.most_similar(words))
    # print(words.split(','), word_num)
    print(words)
    result = model.most_similar_cosmul(words.split(','), topn=int(word_num))  # 余弦相似度
    print(result)
    return result

def post_similarity(url, text_1, text_2, sim_algorithm_name):
    payload = {'text_1': text_1, 'text_2': text_2, 'sim_algorithm_name': sim_algorithm_name}
    headers = {
        'Content-Type': 'application/json'
    }
    response = requests.request('POST', url, headers=headers, data=json.dumps(payload))
    data = json.loads(response.text)
    return data

def summary(text, summary_length):
    summaries = jio.summary.extract_summary(text, summary_length)
    return summaries
# zh_nlp = stanza.Pipeline('zh-hans')
# en_nlp = stanza.Pipeline('en')
# nlp_dict = {'zh': zh_nlp, 'en': en_nlp}
#model = KeyedVectors.load_word2vec_format(os.path.join(BASE_DIR, 'static/platform_base/Tencent_AILab_ChineseEmbedding.bin'), binary=True)
# if __name__ == '__main__':
    # print(word_cut('汤姆生病了。他去了医院。'))
    # print(word_pos('汤姆生病了。他去了医院。'))
    # print(new_words_find('白月光，形容的是一种可望不可即的人或者事物，虽然一直在心上，却从不在身边。'))
    # print(new_words_find('爷青回，表示爷的青春又回来了，爷表示的是自己，将自己的身份地位抬高一个档次，像我是你大爷一样，通常用来形容那些知名的人、经典的动画、影视、游戏剧等重新复出或者是回归。'))
    # show_srl('他叫汤姆去拿外衣。')
    # print(show_dep('他叫汤姆去拿外衣。'))
