# -*- coding: utf-8 -*-
import json

import requests, time
from bs4 import BeautifulSoup
import urllib3
from retry import retry

from base import BaseCore
from classtool import Token, sendData, Driver, Login
from enterprise_tyc.getTycId import getTycIdByDB

baseCore = BaseCore.BaseCore()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
log = baseCore.getLogger()
token = Token()
edge = Driver()
login = Login()
cnx_ = baseCore.cnx
cursor_ = baseCore.cursor

cnx = baseCore.cnx_
cursor = baseCore.cursor_

list_all_1 = []
list_all_2 = []


@retry(tries=3, delay=1)
def get_html(tycid, driver):
    url = f"https://www.tianyancha.com/company/{tycid}"
    driver.get(url=url)
    time.sleep(3)
    page_source = driver.page_source

    soup = BeautifulSoup(page_source, 'html.parser')
    try:
        div_part = soup.find('div', attrs={'data-dim': 'holder'})
    except:
        return -1
    if div_part is None:
        return -2
    else:
        try:
            tmp_field = div_part.find('h3', class_='dimHeader_main-title-txt__GPoaZ').text
            if '股东信息' in tmp_field:
                log.info('股东信息')
                if '股东信息' in div_part.find('div', class_='dim-tab-root').find('span').get_text():
                    total = div_part.find('div', class_='dim-tab-root').find('span').get_text().split('股东信息')[1].replace(
                        ' ', '')
                if '最新公示' in div_part.find('div', class_='dim-tab-root').find('span').get_text():
                    total = div_part.find('div', class_='dim-tab-root').find('span').get_text().split('最新公示')[1].replace(
                        ' ', '')
                return int(total)
            else:  # 否则就是主要股东接口
                if '主要股东' in tmp_field:
                    log.info('主要股东')
                    if '股东信息' in div_part.find('div', class_='dim-tab-root').find('span').get_text():
                        total = div_part.find('div', class_='dim-tab-root').find('span').get_text().split('股东信息')[1].replace(' ', '')
                    if '最新公示' in div_part.find('div', class_='dim-tab-root').find('span').get_text():
                        total = div_part.find('div', class_='dim-tab-root').find('span').get_text().split('最新公示')[1].replace(' ', '')
                    return int(total)
        except:
            return 0


@retry(tries=5, delay=3)
def get_page(url, s, headers):
    res = s.get(url=url, headers=headers, timeout=(5, 10))
    if res.status_code != 200:
        raise
    data_page = res.json()
    try:
        total_page_ = data_page['data']['total']
    except:
        raise
    return total_page_, data_page


@retry(tries=5, delay=3)
def get_page1(url, s, headers):
    res = s.get(url=url, headers=headers, timeout=(5, 10))
    if res.status_code != 200:
        raise
    data_page = res.json()
    try:
        total_page_ = data_page['data']['stockHolder']['total']
    except:
        raise
    return total_page_, data_page


@retry(tries=5, delay=3)
def post_page(url, s, headers, payload):
    res = s.post(url=url, headers=headers, data=json.dumps(payload), timeout=(5, 10))
    if res.status_code != 200:
        raise
    json_info = res.json()
    try:
        total_page_ = json_info['data']['total']
    except:
        raise
    return total_page_, json_info


def doJob():
    # for social_code in social_code_list:
    driver = edge.create_driver()
    url = 'https://www.tianyancha.com/'
    driver.get(url)
    driver.maximize_window()
    while True:
        # todo:设置cookies的使用
        headers = {
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Content-Type': 'application/json',
            'Connection': 'keep-alive',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'version': 'TYC-Web'
        }
        driver, id_cookie, s, update_headers = login.login(driver)
        if id_cookie:
            pass
        else:
            continue
        headers.update(update_headers)
        info = baseCore.r.blpop(['shareHolderEnterprise:gnqy_socialCode'], 2)
        if not info:
            log.info('数据已全部采集完')
            time.sleep(60 * 60)
            continue
        info = info[1].decode()
        # tycid = info.split('|')[1]
        socialCreditCode = info.split('|')[0]
        start = time.time()
        # info = '9133000070471161XA'
        # tycid = info.split('|')[1]
        socialCreditCode = info.split('|')[0]
        dics = []
        log.info(f'开始采集统一社会信用代码为{socialCreditCode}的企业信息')
        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
        try:
            tycid = getTycIdByDB(socialCreditCode, cursor, start, info, s)
            if tycid:
                try:
                    charge = get_html(tycid, driver)
                except:
                    charge = -1
                if charge == -1:
                    token.updateTokeen(id_cookie, 2)
                    time.sleep(3)
                    log.info(f'{socialCreditCode}==={tycid}===详情页获取失败')
                    baseCore.r.rpush('shareHolderEnterprise:gnqy_socialCode', info)
                    continue
                elif charge == -2:
                    # 该企业没有股东信息
                    token.updateTokeen(id_cookie, 2)
                    log.info(f'{socialCreditCode}==={tycid}===没有股东信息')
                    # baseCore.r.rpush('shareHolderEnterprise:gnqy_socialCode', info)
                    continue
                url2 = f'https://capi.tianyancha.com/cloud-company-background/companyV2/dim/holder/latest/announcement'
                url3 = f'https://capi.tianyancha.com/cloud-listed-company/listed/holder/topTen?&gid={tycid}&pageSize=10&pageNum=1&percentLevel=-100&type=1'
                url1 = f'https://capi.tianyancha.com/cloud-listed-company/listed/holder/hk?date=&gid={tycid}&sortField=&sortType=-100&pageSize=10&pageNum=1&percentLevel=-100&keyword='
                payload = {"gid": f"{tycid}", "pageSize": 20, "pageNum": 1, "sortField": "", "sortType": "-100", "historyType": 1}
                try:
                    total_page2, data_page2 = post_page(url2, s, headers, payload)
                except:
                    total_page2 = 0
                    data_page2 = {}
                time.sleep(1)
                try:
                    total_page3, data_page3 = get_page(url3, s, headers)
                except:
                    total_page3 = 0
                    data_page3 = {}
                try:
                    total_page1, data_page1 = get_page1(url1, s, headers)
                except:
                    total_page1 = 0
                    data_page1 = {}
                if total_page2 == charge:
                    url = 'https://capi.tianyancha.com/cloud-company-background/companyV2/dim/holderV2/latest/announcement?'
                    total_page = total_page2
                    data_page_one = data_page2
                    flag = 1
                else:
                    if total_page3 == charge:
                        url = 'https://capi.tianyancha.com/cloud-listed-company/listed/holder/topTen?&gid={}&pageSize=20&pageNum={}&percentLevel=-100&type=1'
                        total_page = total_page3
                        data_page_one = data_page3
                        flag = 3
                    else:
                        url = 'https://capi.tianyancha.com/cloud-listed-company/listed/holder/hk?date=&gid={}&sortField=&sortType=-100&pageSize=20&pageNum={}&percentLevel=-100&keyword='
                        total_page = total_page1
                        data_page_one = data_page1
                        flag = 0

                if total_page == 0:
                    token.updateTokeen(id_cookie, 3)
                    # 重新塞入redis
                    log.info(f'{socialCreditCode}==={tycid}===接口数据获取失败')
                    baseCore.r.rpush('shareHolderEnterprise:gnqy_socialCode',info)
                    continue
                # todo:获取页数
                log.info(f'总数为{total_page}')
                if int(total_page % 20) == 0:
                    maxpage = int((total_page / 20) + 1)
                else:
                    maxpage = int((total_page / 20) + 1) + 1
                for page in range(1, maxpage):
                    if page == 1:
                        data_page = data_page_one
                        errorCode = data_page['errorCode']
                    else:
                        res = None
                        for d in range(3):
                            if flag == 1:
                                url_ = url
                                payload = {"gid": f"{tycid}", "pageSize": 20, "pageNum": f"{page}", "sortField": "",
                                           "sortType": "-100", "historyType": 1}
                                try:
                                    res = s.post(url=url_, headers=headers, data=json.dumps(payload),
                                                 timeout=(5, 10))
                                except requests.exceptions.RequestException as e:
                                    log.info(e)
                                    time.sleep(1)
                                    continue
                                data_page = res.json()
                                errorCode = res.json()['errorCode']
                                if errorCode != 0:
                                    continue
                                else:
                                    break
                            else:
                                url_ = url.format(tycid, page)
                                try:
                                    res = s.get(url_, headers=headers, timeout=(5, 10))  # ,verify=False
                                except requests.exceptions.RequestException as e:
                                    log.info(e)
                                    time.sleep(1)
                                    continue
                                data_page = res.json()
                                errorCode = res.json()['errorCode']
                                if errorCode != 0:
                                    continue
                                else:
                                    break
                        res.close()
                    if errorCode == 0:
                        pass
                    else:
                        token.updateTokeen(id_cookie, 3)
                        # 重新塞入redis
                        baseCore.r.rpush('shareHolderEnterprise:gnqy_socialCode',info)
                        log.info(f'{socialCreditCode}==={tycid}===接口数据获取失败')
                        continue
                    # todo:test测试
                    try:
                        list_all = data_page['data']['holderList']
                    except:
                        try:
                            list_all = data_page['data']['result']
                        except:
                            list_all = data_page['data']['stockHolder']['result']
                    if list_all:
                        pass
                    else:
                        pass
                    # todo: 关闭连接
                    # res.close()
                    log.info(f'----flag:{flag}----')
                    log.info(f'-----list_all:{len(list_all)}----')
                    for idx, holder_info in enumerate(list_all):
                        if page == 1:
                            sort = idx + 1
                        else:
                            sort = idx + 1 + (20 * (page - 1))
                        if flag == 1:
                            name = holder_info['shareHolderName']  # 股东名称
                            shareHoldRation = holder_info['percent']  # 持股比例
                            shareHoldNum = holder_info['shareholdingNum']  # 持股数
                            shareHoldUnit = holder_info['shareholdingNumUnit']  # 持股单位
                            shareType = holder_info['shareType']  # 持股类型
                            year = holder_info['yearReport']  # 发布年份
                            dic = {
                                'socialCreditCode': socialCreditCode,
                                'name': name,
                                'shareHoldRation': shareHoldRation,
                                'shareHoldNum': shareHoldNum,
                                'shareHoldUnit': shareHoldUnit,
                                'shareType': shareType,
                                'year': year,
                                'sort': sort
                            }

                        elif flag == 3:
                            name = holder_info['name']  # 股东名称
                            shareHoldRation = holder_info['proportion']  # 持股比例
                            shareHoldNum = holder_info['holdingNum']  # 持股数
                            shareHoldUnit = holder_info['shareUnit']  # 持股单位
                            shareType = holder_info['shareType']  # 持股类型
                            year = holder_info['publishDate']  # 发布年份
                            dic = {
                                'socialCreditCode': socialCreditCode,
                                'name': name,
                                'shareHoldRation': shareHoldRation,
                                'shareHoldNum': shareHoldNum,
                                'shareHoldUnit': shareHoldUnit,
                                'shareType': shareType,
                                'year': year,
                                'sort': sort
                            }
                        else:
                            name = holder_info['holder_name']  # 股东名称
                            shareHoldRation = holder_info['longHeldRatioWithUnit']  # 持股比例
                            shareHoldNum = holder_info['held_total_num_long_position']  # 持股数
                            shareHoldUnit = holder_info['shareUnit']  # 持股单位
                            shareType = holder_info['shareTypeName']  # 持股类型
                            dic = {
                                'socialCreditCode': socialCreditCode,
                                'name': name,
                                'shareHoldRation': shareHoldRation,
                                'shareHoldNum': shareHoldNum,
                                'shareHoldUnit': shareHoldUnit,
                                'shareType': shareType,
                                'sort': sort
                            }
                        dics.append(dic)
                        # log.info('=========成功======')
                    token.updateTokeen(id_cookie, 3)
                    time.sleep(5)
                try:
                    req = sendData('http://114.115.236.206:8088/sync/shareHolder', dics)
                    log.info('数据发送成功')
                    takeTime = baseCore.getTimeCost(start, time.time())
                    log.info(f'{socialCreditCode}==={req.text}===耗时{takeTime}')
                except Exception as e:
                    log.error(f'数据发送结果口失败==={e}')

        except Exception as e:
            token.updateTokeen(id_cookie, 3)
            log.info(f'==={socialCreditCode}=====企业股东采集失败===重新放入redis====')
            log.info(e)
            # 重新塞入redis
            baseCore.rePutIntoR('ChangeRecordEnterprise:gnqy_socialCode', info)
            time.sleep(5)

        # break


if __name__ == "__main__":
    doJob()
