# -*- coding: utf-8 -*-
# @Time : 2022/9/22 11:08
# @Author : ctt
# @File : data_building
# @Project : 研究中心知识图谱
import mysql.connector
import pandas as pd
import logging
from snow_id import Snow
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] <%(processName)s> (%(threadName)s) %('
                                               'message)s')
logger = logging.getLogger(__name__)


class Data:
    def __init__(self):
        pass

    def initialize(self):
        self.database = self.connect()
        self.entity_df = self.get_data_base_entity()
        print('==========数据获取完毕=============')
        self.database.close()

    def connect(self):
        '''
        连接数据库
        :return:
        '''
        database = mysql.connector.connect(
            host='114.115.159.144',
            user='root',
            passwd='zzsn9988',
            database='clb_project',
            auth_plugin='mysql_native_password'
        )
        return database

    def get_article_data(self, database_conntect):
        # country_sql = '''SELECT id, title, content, origin, publish_date FROM core_base_data WHERE status != 1 and status != 2 and publish_date >= '2022-03-01';'''
        # country_sql = '''select id, title, content, origin, publish_date from core_base_data WHERE id not in (SELECT DISTINCT bid FROM core_base_data_entity)'''
        # country_sql = '''select id, article_title, content, origin, article_time from ai_report_material WHERE id ='1670667630665899' '''
        country_sql = '''select id, article_title, content, origin, article_time from ai_report_material  WHERE type='par' '''

        cursor = database_conntect.cursor()
        cursor.execute(country_sql)
        data_table = cursor.fetchall()
        # columns = [_[0] for _ in cursor.description]
        article_df = pd.DataFrame(data_table, columns=['id', 'article_title', 'content', 'origin', 'article_time'], dtype=str)
        cursor.close()
        return article_df

    def get_data_base_entity(self):
        country_sql = '''SELECT id, compound_word,label_uuid FROM graph_entity;'''
        cursor = self.database.cursor()
        cursor.execute(country_sql)
        entity_base_table = cursor.fetchall()
        # columns = [_[0] for _ in cursor.description]
        entity_df = pd.DataFrame(entity_base_table, columns=['id', 'compound_word', 'label_uuid'], dtype=str)
        cursor.close()
        return entity_df

    def insert_entity(self, bid, eids):
        database_conntect = self.connect()
        insert_data = []
        for eid in eids:
            id = Snow.get_guid()
            logger.info(id)
            insert_data.append((id, bid, eid))
        sql = 'insert ignore into core_base_data_entity (id, bid, eid, status) values (%s, %s, %s, 0)'
        cursor = database_conntect.cursor()
        cursor.executemany(sql, insert_data)
        database_conntect.commit()
        cursor.close()

    def insert_relation(self, relation_id, source_id, target_id, bid):
        database_conntect = self.connect()
        id = Snow.get_guid()
        print((id, relation_id, source_id, target_id, bid, 0))
        sql = 'insert into graph_entity_entity (id, relation_id, source_id, target_id, bid, status) ' \
              'values (%s, %s, %s, %s, %s, 0)' % (id, relation_id, source_id, target_id, bid)
        logger.info(sql)
        cursor = database_conntect.cursor()
        cursor.execute(sql)
        database_conntect.commit()
        cursor.close()
data = Data()
database_conntect = data.connect()
article_df = data.get_article_data(database_conntect)
print(article_df)
print(len(article_df))
database_conntect.close()
# article_df.to_excel(r'数据2022-03.xlsx', index=False)
