独角兽榜单基本信息

cdc4a715 · 薛凌堃 · 41c6aaa2 · cdc4a715
--- a/comData/BaseInfo_qcc/fbsbaseinfo.py
+++ b/comData/BaseInfo_qcc/fbsbaseinfo.py
@@ -9,19 +9,14 @@ import json
 from kafka import KafkaProducer
 from base.BaseCore import BaseCore
 from getQccId import find_id_by_name
-from base.BaseCore import BaseCore
+
 baseCore = BaseCore()
 cnx_ = baseCore.cnx
 cursor_ = baseCore.cursor
-baseCore = BaseCore()
-
-cnx = baseCore.cnx
-cursor = baseCore.cursor
 log = baseCore.getLogger()

 # 通过企查查id获取企业基本信息
-def info_by_id(com_id,com_name):
-
+def info_by_id(com_id,com_name,social_code):
    aa_dict_list = []

    t = str(int(time.time()) * 1000)
@@ -29,14 +24,17 @@ def info_by_id(com_id,com_name):

    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+    log.info(resp_dict)
    time.sleep(2)

    com_jc_name = ''
    try:
        result_dict = resp_dict['result']['Company']
    except:
-        print(com_name + ":获取失败")
-    #
+        log.info(com_name + ":获取失败===========重新放入redis")
+        baseCore.rePutIntoR('dujs_1020:baseinfo_socialcode',social_code)
+        return aa_dict_list
+
    company_name = result_dict['Name']
    CreditCode = result_dict['CreditCode']
    if CreditCode is None:
@@ -309,11 +307,12 @@ def info_by_id(com_id,com_name):
    }

    aa_dict_list.append(aa_dict)
-    print(company_name + "：爬取完成")
+    log.info(company_name + "：爬取完成")
    return aa_dict_list

+
 if __name__ == '__main__':
-    taskType = '基本信息/企查查/福布斯'
+    taskType = '基本信息/企查查/单项双百企业冠军'
    headers = {
        'Host': 'xcx.qcc.com',
        'Connection': 'keep-alive',
@@ -325,54 +324,73 @@ if __name__ == '__main__':
        'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
        'Accept-Encoding': 'gzip, deflate, br,'
    }
-
+    list_weicha = []
+    name_list = []
    #从redis里拿数据
    while True:
+        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
        token = baseCore.GetToken()
-        list_weicha = []
-        list_all_info = []
-        name_list = []
+        dataList = []
+        if token:
+            pass
+        else:
+            log.info('==========已无token==========')
+            time.sleep(30)
+            continue
+        # list_all_info = []
        start_time = time.time()
        # 获取企业信息
-        social_code = baseCore.redicPullData('BaseInfoEnterpriseFbs:gnqy_social_code')
-        # social_code = '91110000710924945A'
-        if social_code is None:
+        # social_code = baseCore.redicPullData('dujs_1020:baseinfo_socialcode')
+        social_code = '91310115067758342E'
+        if social_code == '' or social_code is None:
            time.sleep(20)
            continue
-        log.info(f'----当前企业{social_code}-----')
        dic_info = baseCore.getInfomation(social_code)
-        #
-        count = dic_info[13]
+        log.info(f'----当前企业{social_code}--开始处理---')
+
+        count = dic_info[14]
        com_name = dic_info[1]
        social_code = dic_info[2]
        # 企查查id
        company_id = dic_info[12]
-        # 如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
+        #如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
        if company_id == None:
            if social_code:
-                company_id = find_id_by_name(start_time, token, social_code)
+                company_id = find_id_by_name(start_time,token,social_code)
            else:
-                company_id = find_id_by_name(start_time, token, com_name)
-            # todo:写入数据库
-            updateSql = f"update EnterpriseInfo set QCCID = '{company_id}' where SocialCode = '{social_code}'"
-            cursor_.execute(updateSql)
-            cnx_.commit()
-            post_data_list = info_by_id(company_id, com_name)
-            if company_id == "":
-                print(com_name + "：企业ID获取失败")
-                list_weicha.append(com_name + "：企业ID获取失败")
+                company_id = find_id_by_name(start_time,token,com_name)
+            if company_id == 'null':
+                log.info('=====搜索不到该企业====')
+                #todo:搜不到的企业没有信用代码 传输不过去 生成一个信用代码
+                baseCore.rePutIntoR('dujs_1020:baseinfo_socialcode', social_code + '：搜索不到')
                continue
-        else:
-            log.info(f'====={social_code}===={company_id}=====获取企业id成功=====')
-            try:
-                post_data_list = info_by_id(company_id, com_name)
-            except:
-                log.info(f'====={social_code}=====获取基本信息失败，重新放入redis=====')
-                baseCore.rePutIntoR('BaseInfoEnterpriseFbs:gnqy_social_code', social_code)
+            if not company_id:
+                log.info(social_code + "：企业ID获取失败===重新放入redis")
+                list_weicha.append(social_code + "：企业ID获取失败")
+                baseCore.rePutIntoR('dujs_1020:baseinfo_socialcode',social_code)
+                baseCore.delete_token(token)
+                log.info('=====已重新放入redis,失效token已删除======')
+                time.sleep(20)
                continue
-
+            else:
+                log.info(f'====={com_name}===={company_id}=====获取企业id成功=====')
+                # todo:写入数据库
+                updateSql = f"update EnterpriseInfo set QCCID = '{company_id}' where SocialCode = '{social_code}'"
+                cursor_.execute(updateSql)
+                cnx_.commit()
+        try:
+            post_data_list = info_by_id(company_id, com_name, social_code)
+        except:
+            log.info(f'====={social_code}=====获取基本信息失败，重新放入redis=====')
+            baseCore.rePutIntoR('dujs_1020:baseinfo_socialcode', social_code)
+            continue
+        if post_data_list:
+            pass
+        else:
+            log.info(f'======{social_code}====企查查token失效====')
+            time.sleep(20)
+            continue
        for post_data in post_data_list:
-            list_all_info.append(post_data)
            if post_data is None:
                print(com_name + "：企业信息获取失败")
                list_weicha.append(com_name + "：企业信息获取失败")
@@ -396,17 +414,18 @@ if __name__ == '__main__':
                takeTime = baseCore.getTimeCost(start_time, time.time())
                baseCore.recordLog(get_socialcode, taskType, state, takeTime, '', exception)
                log.info(f"{get_name}--{get_socialcode}--kafka传输失败")
-        # break
+
        # 信息采集完成后将该企业的采集次数更新
        runType = 'BaseInfoRunCount'
        count += 1
        baseCore.updateRun(social_code, runType, count)
-    nowtime = baseCore.getNowTime(1).replace('-', '_')[:10]
+        break
+    nowtime = baseCore.getNowTime(1).replace('-','_')[:10]
    companyName = pd.DataFrame(name_list)
-    companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx', index=False)
+    companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx',index=False)
    false_com = pd.DataFrame(list_weicha)
-    false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx', index=False)
-    baseCore.close()
+    false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx',index=False)
+