# -*- coding: utf-8 -*-
import pandas as pd

import time

import requests
import json

from kafka import KafkaProducer
from BaseCore import BaseCore
from getQccId import find_id_by_name

baseCore = BaseCore()
cnx_ = baseCore.cnx
cursor_ = baseCore.cursor
log = baseCore.getLogger()

# 通过企查查id获取企业基本信息
def info_by_id(com_id,com_name):
    aa_dict_list = []

    t = str(int(time.time()) * 1000)
    headers['Qcc-Timestamp'] = t

    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
    time.sleep(2)

    com_jc_name = ''
    try:
        result_dict = resp_dict['result']['Company']
    except:
        log.info(com_name + ":获取失败===========重新放入redis")
        baseCore.rePutIntoR('hundred:baseinfo',com_name)
        return aa_dict_list

    company_name = result_dict['Name']
    CreditCode = result_dict['CreditCode']
    if CreditCode is None:
        CreditCode = ''
    try:
        OperName = result_dict['Oper']['Name']
    except:
        OperName = ''
    if OperName is None:
        OperName = ''
    if baseCore.str_have_num(OperName):
        OperName = ''
    try:
        Status = result_dict['ShortStatus']
    except:
        Status = ''
    if Status is None:
        Status = ''
    try:
        StartDate = result_dict['StartDate']
    except:
        StartDate = ''
    if StartDate is None:
        StartDate = ''
    try:
        RegistCapi = result_dict['RegistCapi']
    except:
        RegistCapi = ''
    if RegistCapi is None:
        RegistCapi = ''
    RecCap = ''  # result_dict['RecCap']  #实际缴纳金额，现已没有显示
    if RecCap is None:
        RecCap = ''

    try:
        OrgNo = result_dict['CreditCode'][8:-2] + '-' + result_dict['CreditCode'][-2]  # 组织机构代码，现已没有显示
    except:
        OrgNo = ''
    if OrgNo is None:
        OrgNo = ''

    try:
        TaxNo = result_dict['TaxNo']
    except:
        TaxNo = ''
    if TaxNo is None:
        TaxNo = ''
    try:
        EconKind = result_dict['EconKind']
    except:
        EconKind = ''
    if EconKind is None:
        EconKind = ''
    TermStart = ''  # result_dict['TermStart']  营业期限自，现已没有显示
    if TermStart is None:
        TermStart = ''
    TeamEnd = ''  # result_dict['TeamEnd']营业期限至，现已没有显示
    if TeamEnd is None:
        TeamEnd = ''

    try:
        SubIndustry = result_dict['Industry']['SubIndustry']
    except:
        SubIndustry = ''
    if SubIndustry is None:
        SubIndustry = ''
    try:
        Province = result_dict['Area']['Province']
    except:
        Province = ''
    try:
        City = result_dict['Area']['City']
    except:
        City = ''
    try:
        County = result_dict['Area']['County']
    except:
        County = ''
    try:
        region = Province + City + County
    except:
        region = ''
    BelongOrg = ''  # result_dict['BelongOrg']登记机关，现已没有显示
    can_bao = ''
    CommonList = []  # result_dict['CommonList']参保人数，现已没有显示
    for Common_dict in CommonList:
        try:
            KeyDesc = Common_dict['KeyDesc']
        except:
            continue
        if KeyDesc == '参保人数':
            can_bao = Common_dict['Value']
    if can_bao == '0':
        can_bao = ''
    OriginalName = ''
    try:
        OriginalName_lists = result_dict['OriginalName']
        for OriginalName_dict in OriginalName_lists:
            OriginalName += OriginalName_dict['Name'] + ' '
    except:
        OriginalName = ''
    try:
        OriginalName.strip()
    except:
        OriginalName = ''
    EnglishName = ''  # result_dict['EnglishName']企业英文名，现已没有显示
    if EnglishName is None:
        EnglishName = ''
    IxCode = ''  # result_dict['IxCode']进出口企业代码，现已没有显示
    if IxCode is None:
        IxCode = ''
    Address = result_dict['Address']
    if Address is None:
        Address = ''
    Scope = ''  # result_dict['Scope']经营范围，现已没有显示
    if Scope is None:
        Scope = ''
    try:
        PhoneNumber = result_dict['companyExtendInfo']['Tel']
    except:
        PhoneNumber = ''
    if PhoneNumber is None:
        PhoneNumber = ''
    try:
        WebSite = result_dict['companyExtendInfo']['WebSite']
    except:
        WebSite = None
    if WebSite is None:
        try:
            WebSite = result_dict['ContactInfo']['WebSite'][0]['Url']
        except:
            WebSite = ''
    try:
        Email = result_dict['companyExtendInfo']['Email']
    except:
        Email = ''
    if Email is None:
        Email = ''
    try:
        Desc = result_dict['companyExtendInfo']['Desc']
    except:
        Desc = ''
    if Desc is None:
        Desc = ''
    try:
        Info = result_dict['companyExtendInfo']['Info']
    except:
        Info = ''
    if Info is None:
        Info = ''

    company_name = baseCore.hant_2_hans(company_name)

    t = str(int(time.time()) * 1000)
    headers['Qcc-Timestamp'] = t
    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v6/base/getEntDetail?token={}&t={}&unique={}".format(token, t,
                                                                                                         com_id)
    resp_dict2 = requests.get(url=url, headers=headers, verify=False).json()
    time.sleep(1)

    try:
        com2 = resp_dict2['result']['Company']
    except:
        com2 = ''
    try:
        Scope = com2['Scope']
    except:
        Scope = ''
    try:
        CheckDate = com2['CheckDate']
    except:
        CheckDate = ''
    if CheckDate is None:
        CheckDate = ''
    try:
        TaxpayerType = com2['TaxpayerType']     #纳税人资质
    except:
        TaxpayerType = ''
    if TaxpayerType is None:
        TaxpayerType = ''

    try:
        No = com2['No']
    except:
        No = ''
    if No is None:
        No = ''
    try:
        IxCode = com2['IxCode']
    except:
        IxCode = ''
    try:
        OrgNo = com2['OrgNo']
    except:
        OrgNo = ''
    try:
        for Common_t in com2['CommonList']:
            try:
                if Common_t['KeyDesc'] == '参保人数':
                    can_bao = Common_t['Value']
            except:
                pass
    except:
        can_bao = ''
    try:
        TermStart = com2['TermStart']
    except:
        TermStart = ''
    try:
        TeamEnd = com2['TeamEnd']
    except:
        TeamEnd = ''
    try:
        RecCap = com2['RecCap']
    except:
        RecCap = ''
    try:
        No = com2['No']
    except:
        No = ''
    try:
        SubIndustry = com2['IndustryArray'][-1]
    except:
        SubIndustry = ''
    try:
        BelongOrg = com2['BelongOrg']
    except:
        BelongOrg = ''
    try:
        EnglishName = com2['EnglishName']
    except:
        EnglishName = ''

    aa_dict = {
        'qccId': com_id,  # 企查查企业id
        'name': company_name,  # 企业名称
        'shortName': com_jc_name,  # 企业简称
        'socialCreditCode': CreditCode,  # 统一社会信用代码
        'legalPerson': OperName,  # 法定代表人
        'officialPhone': PhoneNumber,  # 电话
        'officialUrl': WebSite,  # 官网
        'officialEmail': Email,  # 邮箱
        'briefInfo': Desc,  # 简介
        'registerStatus': Status,  # 登记状态
        'incorporationDate': StartDate,  # 成立日期
        'capital': RegistCapi,  # 注册资本
        'paidCapital': RecCap,  # 实缴资本
        'approvalDate': CheckDate,  # 核准日期
        'organizationCode': OrgNo,  # 组织机构代码
        'registerNo': No,  # 工商注册号
        'taxpayerNo': CreditCode,  # 纳税人识别号
        'type': EconKind,  # 企业类型
        'businessStartDate': TermStart,  # 营业期限自
        'businessEndDate': TeamEnd,  # 营业期限至
        'taxpayerQualification': TaxpayerType,  # 纳税人资质
        'industry': SubIndustry,  # 所属行业
        'region': region,
        'province': Province,  # 所属省
        'city': City,  # 所属市
        'county': County,  # 所属县
        'registerDepartment': BelongOrg,  # 登记机关
        'scale': Info,  # 人员规模
        'insured': can_bao,  # 参保人数
        'beforeName': OriginalName,  # 曾用名
        'englishName': EnglishName,  # 英文名
        'importExportEnterpriseCode': IxCode,  # 进出口企业代码
        'address': Address,  # 地址
        'businessRange': Scope,  # 经营范围
        'status': 0,  # 状态
    }

    aa_dict_list.append(aa_dict)
    log.info(company_name + "：爬取完成")
    return aa_dict_list


if __name__ == '__main__':
    taskType = '基本信息/企查查/单项双百企业冠军'
    headers = {
        'Host': 'xcx.qcc.com',
        'Connection': 'keep-alive',
        'Qcc-Platform': 'mp-weixin',
        'Qcc-Timestamp': '',
        'Qcc-Version': '1.0.0',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
        'content-type': 'application/json',
        'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
        'Accept-Encoding': 'gzip, deflate, br,'
    }
    list_weicha = []
    name_list = []
    #从redis里拿数据
    while True:
        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
        token = baseCore.GetToken()
        if token:
            pass
        else:
            log.info('==========已无token==========')
            time.sleep(30)
            continue
        # list_all_info = []
        start_time = time.time()
        # 获取企业信息
        com_name = baseCore.redicPullData('hundred:baseinfo')
        # com_name = '卓新市万达铸业有限公司'
        if com_name == '' or com_name is None:
            time.sleep(20)
            continue
        dic_info = baseCore.getInfomation(com_name)
        log.info(f'----当前企业{com_name}--开始处理---')

        social_code = dic_info[5]
        #企查查id
        company_id = dic_info[6]
        #如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
        if company_id == None:
            if social_code:
                company_id = find_id_by_name(start_time,token,social_code)
            else:
                company_id = find_id_by_name(start_time,token,com_name)
            if company_id == 'null':
                log.info('=====搜索不到该企业====')
                #todo:搜不到的企业没有信用代码 传输不过去 生成一个信用代码
                baseCore.rePutIntoR('hundred:baseinfo', com_name + '：搜索不到')
                continue
            if not company_id:
                log.info(com_name + "：企业ID获取失败===重新放入redis")
                list_weicha.append(com_name + "：企业ID获取失败")
                baseCore.rePutIntoR('hundred:baseinfo',com_name)
                baseCore.delete_token(token)
                log.info('=====已重新放入redis,失效token已删除======')
                time.sleep(20)
                continue
            else:
                log.info(f'====={com_name}===={company_id}=====获取企业id成功=====')
                # todo:写入数据库
                updateqccid = f"update Hundred set qccid = '{company_id}' where CompanyName = '{com_name}'"
                cursor_.execute(updateqccid)
                cnx_.commit()
        try:
            post_data_list = info_by_id(company_id, com_name)
        except:
            log.info(f'====={social_code}=====获取基本信息失败，重新放入redis=====')
            baseCore.rePutIntoR('hundred:baseInfo', com_name)
            baseCore.delete_token(token)
            log.info('=====已重新放入redis,失效token已删除======')
            continue
        if post_data_list:
            pass
        else:
            # log.info(f'======{social_code}====企查查token失效====')
            time.sleep(20)
            continue
        for post_data in post_data_list:
            # list_all_info.append(post_data)
            if post_data is None:
                print(com_name + "：企业信息获取失败")
                list_weicha.append(com_name + "：企业信息获取失败")
                continue
            get_name = post_data['name']
            get_socialcode = post_data['socialCreditCode']
            #todo:将信用代码更新到表中
            updatesocialcode = f"update Hundred set SocialCode = '{get_socialcode}' where CompanyName = '{com_name}'"
            cursor_.execute(updatesocialcode)
            cnx_.commit()
            name_compile = {
                'yuan_name':com_name,
                'get_name':get_name
            }
            name_list.append(name_compile)

            log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}')
            try:
                producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
                kafka_result = producer.send("regionInfo", json.dumps(post_data, ensure_ascii=False).encode('utf8'))
                print(kafka_result.get(timeout=10))
            except:
                exception = 'kafka传输失败'
                state = 0
                takeTime = baseCore.getTimeCost(start_time, time.time())
                baseCore.recordLog(get_socialcode, taskType, state, takeTime, '', exception)
                log.info(f"{get_name}--{get_socialcode}--kafka传输失败")
        # break
    nowtime = baseCore.getNowTime(1).replace('-','_')[:10]
    companyName = pd.DataFrame(name_list)
    companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx',index=False)
    false_com = pd.DataFrame(list_weicha)
    false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx',index=False)






