import re
import time
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import pymongo
import requests
from apscheduler.schedulers.blocking import BlockingScheduler

from retry import retry

from base import BaseCore

baseCore = BaseCore.BaseCore()
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').RESCenter[
    'RETIsProdQuot']
log = baseCore.getLogger()

headers = {
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/json',
    'Host': 'www.szse.cn',
    'Pragma': 'no-cache',
    'Referer': 'http://www.szse.cn/market/product/list/all/index.html',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
    'X-Request-Type': 'ajax',
    'X-Requested-With': 'XMLHttpRequest',
}


# 获取基金代码与上市时间
@retry(tries=3, delay=3)
def getData():
    data_list = []
    # ip = baseCore.get_proxy()
    url = 'https://reits.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=reits_fund_list&PAGENO=1&PAGESIZE=10'
    req = requests.get(url, headers=headers)
    req.encoding = req.apparent_encoding
    data_json = req.json()[0]['data']
    for data_ in data_json:
        jjjcurl = re.findall('<u>(.*?)</u>', data_['jjjcurl'])[0].lstrip().strip()
        sys_key = data_['sys_key'].lstrip().strip()
        ssrq = data_['ssrq'].lstrip().strip()
        # 基金简称 基金代码 上市时间
        data = [jjjcurl, sys_key, ssrq]
        data_list.append(data)
    req.close()
    return data_list


# 获取基金交易信息
@retry(tries=3, delay=20)
def getDataList(code, start_date, end_date):
    ip = baseCore.get_proxy()
    archiveDate = str(datetime.today().year) + '-'+ str(datetime.today().month) + '-' + '01'
    url = f'http://www.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=1815_stock_snapshot&TABKEY=tab2&txtDMorJC={code}&txtBeginDate={str(start_date)[:10]}&txtEndDate={str(end_date)[:10]}&archiveDate={archiveDate}'
    req = requests.get(url, headers=headers)
    req.encoding = req.apparent_encoding
    data_json = req.json()[0]['data'][::-1]
    req.close()
    for data_ in data_json:
        jyrq = data_['jyrq']
        zqdm = data_['zqdm']
        zqjc = data_['zqjc']
        qss = data_['qss'].replace(',', '')
        ks = data_['ks'].replace(',', '')
        zg = data_['zg'].replace(',', '')
        zd = data_['zd'].replace(',', '')
        ss = data_['ss'].replace(',', '')
        cjgs = data_['cjgs'].replace(',', '')
        cjje = data_['cjje'].replace(',', '')
        jyrq = datetime.strptime(jyrq, '%Y-%m-%d')
        is_insert = db_storage.find_one({'code': zqdm, 'date': jyrq, 'exchange': '深圳证券交易所'})
        if is_insert:
            log.info(f'{code}==={jyrq}===已采集')
            continue
        dic_info = {
            'code': zqdm,  # 代码
            'shortName': zqjc,  # 简称
            'opening': float(ks),  # 开盘价
            'max': float(zg),  # 最高价
            'min': float(zd),  # 最低价
            'closed': float(ss),  # 收盘价
            'ytdClosed': float(qss),  # 前收价
            'volume': float(cjgs),  # 交易量
            'amount': float(cjje),  # 交易金额
            'totalValue': '',  # 市价总值
            'negoValue': '',  # 流通总值
            'toRate': '',  # 换手率
            'date': jyrq,  # 时间
            'strDate' : str(jyrq)[:10], # 字符串 时间
            'country': '中国',  # 国家
            'exchange': '深圳证券交易所',  # 交易所
            "currency": "CNY"  # 币种
        }
        db_storage.insert_one(dic_info)
        log.info(f'{code}==={jyrq}===采集成功')
        time.sleep(3)


def doJob():
    try:
        data_list = getData()
    except Exception as e:
        log.error(f'基金列表获取失败==={e}')
        return
    log.info('开始采集')
    for data in data_list:
        name = data[0]
        code = data[1]
        log.info(f'{code}==={name}===开始采集')
        # start_date = data[2]
        # start_date = datetime.strptime(start_date, "%Y-%m-%d")
        current_date = datetime.now()
        start_date = current_date + timedelta(days=-5)
        # end_date = start_date + timedelta(days=5)
        # while end_date != current_date:
        #     time.sleep(1)
        try:
            # getDataList(code, start_date, end_date)
            getDataList(code,start_date,current_date)
        except Exception as e:
            log.error(f'{code}==={start_date}-{current_date}===采集失败==={e}')
            # start_date = end_date + timedelta(days=1)
            # end_date = start_date + timedelta(days=5)
            # if end_date > current_date:
            #     end_date = current_date


def task():
    # 实例化一个调度器
    scheduler = BlockingScheduler()
    # 每天执行一次
    scheduler.add_job(doJob, 'cron', hour='18', minute=0, max_instances=2 )
    try:
        scheduler.start()
    except Exception as e:
        log.error('定时采集异常', e)
        pass


if __name__ == '__main__':
    # doJob()
    task()
