"""
采集上市信息企业信用代码，跟数据库对比
"""

import json
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

from base.BaseCore import BaseCore
baseCore = BaseCore()
cnx_ = baseCore.cnx
cursor_ = baseCore.cursor

taskType = '上市信息/东方财富网'

log = baseCore.getLogger()
error_list = []
list_all_info = []

def createDriver():
    chrome_driver = r'D:\cmd100\chromedriver.exe'
    path = Service(chrome_driver)
    chrome_options = webdriver.ChromeOptions()
    chrome_options.binary_location = r'D:\Google\Chrome\Application\chrome.exe'
    # 设置代理
    # proxy = "127.0.0.1:8080"  # 代理地址和端口
    # chrome_options.add_argument('--proxy-server=http://' + proxy)
    driver = webdriver.Chrome(service=path, chrome_options=chrome_options)
    return driver
# 需要提供股票代码、企业信用代码
while True:
    start = time.time()
    com_code1 = baseCore.redicPullData('gpdm:info')
    # com_code1 = '837592'
    if com_code1:
        pass
    else:
        log.info('已没有数据')
        break
    # 股票代码0、2、3开头的为深圳交易所，6、9开头的为上海交易所，8开头的为北京交易所
    if com_code1[0] == '2' or com_code1[0] == '0' or com_code1[0] == '3':
        com_code = 'sz' + com_code1
    if com_code1[0] == '9' or com_code1[0] == '6':
        com_code = 'sh' + com_code1
    if com_code1[0] == '8' or com_code1[0] == '4':
        com_code = 'bj' + com_code1
    if com_code1[0] == 'A':
        com_code = ''
    log.info(f'======开始采集{com_code1}======')
    if 'bj' in com_code:
        url = f'https://quote.eastmoney.com/bj/{com_code1}.html'
    else:
        url = f'https://quote.eastmoney.com/{com_code}.html'

    url_1 = f'https://emweb.eastmoney.com/PC_HSF10/CompanySurvey/PageAjax?code={com_code}'
    url_2 = f'https://emweb.eastmoney.com/PC_HSF10/BusinessAnalysis/PageAjax?code={com_code}'

    browser = createDriver()
    browser.get(url)
    time.sleep(8)
    page_source = browser.page_source
    soup_t = BeautifulSoup(page_source, 'html.parser')
    try:
        result = soup_t.find('div',class_='quote_quotenums').text
        # print(f'result:{result}')
        # if result=='未上市'or result=='已退市':
        if result == '未上市' :
            tag = 2
        if result == '已退市':
            tag = 0
        else:
            tag = 1
    except Exception as e:
        error_list.append(com_code)
        log.info(f'={com_code}===解析上市状态失败=====')
        state = 0
        takeTime = baseCore.getTimeCost(start, time.time())
        baseCore.recordLog('', taskType, state, takeTime, '', f'{com_code}解析上市状态失败--e:{e}')
        print('error')
        continue

    requests.adapters.DEFAULT_RETRIES = 5

    json_1 = requests.get(url_1,verify=False).json()
    json_2 = requests.get(url_2,verify=False).json()

    zhengquan_type = json_1['jbzl'][0]['SECURITY_TYPE']
    # print(zhengquan_type)
    if 'A' in zhengquan_type:
        # print(zhengquan_type)
        category = '1'
    if 'B' in zhengquan_type:
        category = '2'
    if '新三板' in zhengquan_type:
        category = '3'
    if 'H' in zhengquan_type:
        category = '4'
    id_code = json_1['jbzl'][0]['REG_NUM']

    zhuyingfanwei = json_2['zyfw'][0]['BUSINESS_SCOPE']

    dic_cwsj = {

        "category": category,  # 股票类型(1-A股;2-B股;3-新三板;4-H股)
        'listed':tag,
        "securitiesCode": com_code[2:],
        "securitiesType": zhengquan_type,
        "socialCreditCode": id_code,

    }

    list_all_info.append(dic_cwsj)
    log.info(f'======{com_code}====采集成功=====')

    updateSql = f"update gpdm1 set xydm = '{id_code}',tag = '{tag}',category = '{category}' where gpdm = '{com_code1}'"
    cursor_.execute(updateSql)
    cnx_.commit()
