# -*- coding: utf-8 -*-
import time
import requests
import urllib3
from retry import retry
from selenium import webdriver
from bs4 import BeautifulSoup
from classtool import Token, sendData, Driver, Login
from base import BaseCore
from enterprise_tyc.getTycId import getTycIdByDB

"""变更记录"""
# baseCore = BaseCore.BaseCore(sqlflg=False)
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
token = Token()
edge = Driver()
login = Login()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
cnx_ = baseCore.cnx
cursor_ = baseCore.cursor

cnx = baseCore.cnx_
cursor = baseCore.cursor_
taskType = "天眼查/变更记录"


@retry(tries=5, delay=5)
def getJson(url, headers, s):
    ip = baseCore.get_proxy()
    log.info(f'当前使用的ip是{ip}')
    # req = requests.get(url, headers=headers, timeout=20)
    req = s.get(url, headers=headers, proxies=ip, timeout=(5, 10))
    # req = s.get(url, headers=headers, timeout=(5, 10))
    dataJson = req.json()
    if dataJson['errorCode'] != 0:
        raise
    req.close()
    return dataJson['data']


def doJob():
    driver = edge.create_driver()
    url = 'https://www.tianyancha.com/'
    driver.get(url)
    driver.maximize_window()
    while True:
        headers = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cache-Control': 'max-age=0',
            'Connection': 'keep-alive',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'version': 'TYC-Web'
        }
        driver, id_cookie, s, update_headers = login.login(driver)
        if id_cookie:
            pass
        else:
            continue
        headers.update(update_headers)
        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
        info = baseCore.r.blpop(['ChangeRecordEnterprise:gnqy_socialCode'], 2)
        if not info:
            log.info('数据已全部采集完')
            time.sleep(60 * 60)
            continue
        # info = "9133000070471161XA"
        info = info[1].deocde()
        start = time.time()
        socialCreditCode = info.split('|')[0]
        try:
            tycId = getTycIdByDB(socialCreditCode, cursor, start, info, s)
            if tycId:
                url = f'https://capi.tianyancha.com/cloud-company-background/company/changeinfoEm?gid={tycId}&pageNum=1&pageSize=10&changeItem=-100'
                datasJson = getJson(url, headers, s)
                total = datasJson['total']
                if total % 10 == 0:
                    totalPage = int(total / 10)
                else:
                    totalPage = int(total / 10 + 1)
                dics = []
                for page in range(1, totalPage + 1):
                    if page != 1:
                        url = url.replace(f'pageNum={page - 1}', f'pageNum={page}')

                        datasJson = getJson(url, headers, s)
                    for dataJson in datasJson['result']:
                        changeDate = dataJson['changeTime']
                        changeItem = dataJson['changeItem']
                        changeBeforeTag = dataJson['contentBefore']
                        changeBeforeTag = BeautifulSoup(changeBeforeTag, 'lxml')
                        newChangeBeforeTag = BeautifulSoup('', 'lxml')
                        changeBeforePList = changeBeforeTag.find_all('p')
                        for changeBeforePTag in changeBeforePList:
                            newPTag = newChangeBeforeTag.new_tag('p')
                            newPTag.string = changeBeforePTag.text
                            newChangeBeforeTag.append(newPTag)
                        changeAfterTag = dataJson['contentAfter']
                        changeAfterTag = BeautifulSoup(changeAfterTag, 'lxml')
                        newChangeAfterTag = BeautifulSoup('', 'lxml')
                        changeAfterPList = changeAfterTag.find_all('p')
                        for changeAfterPTag in changeAfterPList:
                            newPTag = newChangeAfterTag.new_tag('p')
                            newPTag.string = changeAfterPTag.text
                            newChangeAfterTag.append(newPTag)
                        dic = {
                            "changeDate": changeDate,  # 变更时间
                            "changeItem": changeItem,  # 变更事项
                            "changeBefore": str(newChangeBeforeTag),  # 变更前
                            "changeAfter": str(newChangeAfterTag),  # 变更后
                            "socialCreditCode": socialCreditCode  # 社会信用代码
                        }
                        dics.append(dic)
                    time.sleep(5)
                if dics:
                    req = sendData('http://114.115.236.206:8088/sync/changeRecord', dics)
                    # req = sendData('http://192.168.1.69:8088/sync/changeRecord', dics)
                    takeTime = baseCore.getTimeCost(start, time.time())
                    log.info(f'{socialCreditCode}==={req.text}===耗时{takeTime}')
                    # log.info(f'{socialCreditCode}=====耗时{takeTime}')
                else:
                    log.info(f'{socialCreditCode}===变更信息为空')
            else:
                continue

        except Exception as e:
            token.updateTokeen(id_cookie, 3)
            # token.updateTokeen(id_cookie, 2)
            log.info(f'==={socialCreditCode}=====企业变更记录采集失败===重新放入redis====')
            log.info(e)
            # 重新塞入redis
            baseCore.rePutIntoR('ChangeRecordEnterprise:gnqy_socialCode', info)
            # state = 0
            # takeTime = baseCore.getTimeCost(start, time.time())
            # baseCore.recordLog(socialCreditCode, taskType, state, takeTime, '', f'获取变更信息失败--{e}')
            time.sleep(5)

        break




if __name__ == "__main__":
    doJob()
