import re
import time
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import pymongo
import requests
from retry import retry

from base import BaseCore
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='zzsn@9988').研究中心['REITs市场概况-深圳']
db_storage_ = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='zzsn@9988').研究中心['REITs基金列表']
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Host': 'reits.szse.cn',
    'Origin': 'https://newmedia.szse.cn',
    'Pragma': 'no-cache',
    'Referer': 'https://newmedia.szse.cn/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}


def getData():
    data_list = []
    ip = baseCore.get_proxy()
    url = 'https://reits.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=reits_fund_list&PAGENO=1&PAGESIZE=10'
    req = requests.get(url, headers=headers, proxies=ip)
    req.encoding = req.apparent_encoding
    data_json = req.json()[0]['data']
    for data_ in data_json:
        jjjcurl = re.findall('<u>(.*?)</u>', data_['jjjcurl'])[0].lstrip().strip()
        sys_key = data_['sys_key'].lstrip().strip()
        dqgm = data_['dqgm'].lstrip().strip()
        ltgm = data_['ltgm'].lstrip().strip()
        try:
            glrmc = re.findall('\'>(.*?)</a>', data_['glrmc'])[0].lstrip().strip()
        except:
            glrmc = data_['glrmc']
        tzlb = data_['tzlb'].lstrip().strip()
        jjlb = data_['jjlb'].lstrip().strip()
        ssrq = data_['ssrq'].lstrip().strip()[:10]
        data = [jjjcurl, sys_key, dqgm, ltgm, glrmc, tzlb, jjlb, ssrq]
        into_dict = {
            '基金简称':jjjcurl,
            '基金代码':sys_key,
            '当前规模(万份)':dqgm,
            '流通规模(万份)':ltgm,
            '基金管理人':glrmc,
            '投资类别':tzlb,
            '基金类别':jjlb,
            '上市日期':ssrq
        }
        db_storage_.insert_one(into_dict)
        time.sleep(1)
        data_list.append(data)
    df = pd.DataFrame(np.array(data_list))
    df.columns = ['基金简称', '基金代码', '当前规模(万份)', '流通规模(万份)', '基金管理人', '投资类别', '基金类别', '上市日期']
    return df

@retry(tries=5,delay=10)
def getDataJson(date):
    # ip = baseCore.get_proxy()
    url = f'https://reits.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=reits_scgk_oa&txtQueryDate={date}'
    # req = requests.get(url, headers=headers, proxies=ip)
    req = requests.get(url,headers=headers)
    data_json = req.json()[0]['data']
    req.close()
    return data_json

# 2021-06-21

def doJob():
    log.info('=====开始采集=====')
    start_time = time.time()
    writer = pd.ExcelWriter('市场板块/深圳交易所市场概况.xlsx')
    start_date = datetime(2022, 1, 1)
    end_date = datetime.today()
    date_range = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]
    data_list = []
    for date in date_range:
        data_json = getDataJson(date)
        for data_ in data_json:
            data = [data_['lbmc'], data_['zqsl'], data_['zgb'], data_['cjsl'], data_['cjje'], data_['sjzz'],str(date)]
            dic = {
                '产品数量(只)':data_['zqsl'],
                '股份余额(万份)':data_['zgb'],
                '日成交份额(万份)':data_['cjsl'],
                '日成交金额(万元)':data_['cjje'],
                '总市值(亿元)':data_['sjzz'],
                '日期':str(date)
            }
            db_storage.insert_one(dic)
            log.info(f'{date}===采集成功')
            data_list.append(data)
        time.sleep(5)
    df_1 = pd.DataFrame(np.array(data_list))
    df_1.columns = ['基金品种', '产品数量(只)', '股份余额(万份)', '日成交份额(万份)', '日成交金额(万元)', '总市值(亿元)','日期']
    df_1.to_excel(writer, sheet_name='基础设施公募', index=False)
    df_2 = getData()
    df_2.to_excel(writer, sheet_name='基金列表', index=False)
    writer.save()
    log.info(f'=====采集结束=====耗时{baseCore.getTimeCost(start_time,time.time())}')


if __name__ == '__main__':
    doJob()
    baseCore.close()
