
import configparser
import csv
import glob
import os
import shutil
import time

import pandas as pd
import redis
import requests
from datetime import datetime
from logRecord import LogRecord
import traceback

'''
海关商品详情下载流程
1.下载商品编码
2.对商品编码进行分组 
3.对商品编码进行重命名
4.拼接路径和创建文件名
5.进行数据的下载
6.去重文件中的 \r换行
7.进行文件的保存



海关下载数据类型和参数分类组合
CODE_TS #商品编码  ORIGIN_COUNTRY  #贸易伙伴 TRADE_MODE #贸易方式 TRADE_CO_PORT #收发货地址
1.设置进出口类型 （默认进口，出口，进出口都进行下载）采用遍历的方式
2.设置查询起止时间 默认最新一个月的单月数据，和累计的数据下载 
3.设置币种 默认是usd  
4.查询字段分组 1.商品详情 四个都设置
5.单个统计数据下载 下载单个分组的数据
6.排序方式，使用默认的编码排序

7.下载文件路径设置和命名规则
d:/hg/2023/7/
数据默认存储位置 D://hg 
其它路径从参数中读取 
一级 年份 
二级月份 
三级月份类型单月，累计 
四级 币种 
五级 字段分组
六级 文件名

3、采集单个字段的统计数据

4.临时文件
1）将请求下载的文件放到临时目录中，
2）对临时的目录文件进行数据的过滤修改重命名保存到对应目录下
3）将临时文件删除
4）根据文件名和列表记录做对比，来下载缺失的文件

5.数据下载分类 
1）按照类型分组获取对应的每月的最新编码信息
2）根据字段编码和商品进行对应统计信息的下载
3）根据商品编码下载数据
'''

log=LogRecord()
class HgDownFile(object):

    def __init__(self):
        self.downUrl="http://stats.customs.gov.cn/queryData/downloadQueryData"
        # 创建ConfigParser对象
        self.config = configparser.ConfigParser()
        # 读取配置文件
        self.config.read('config.ini')
        self.r = redis.Redis(host=self.config.get('redis', 'host'),
                             port=self.config.get('redis', 'port'),
                             password=self.config.get('redis', 'pass'), db=0)

    def getcookie(self):
        cookie=self.r.spop('hgcookie')
        # cookie=self.r.srandmember('hgcookie')
        while cookie is None:
            time.sleep(10)
            cookie=self.r.srandmember('hgcookie')
            if cookie is not None:
                break
        cookie=cookie.decode('utf-8')
        cookie=cookie.strip('"')
        return cookie
    #请求下载文件
    def reqDownFile(self,data):
        data=data
        statuscode=410
        filename='数据文件.csv'
        while statuscode != 200:
            try:
                header={
                    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
                    'Accept-Encoding':'gzip, deflate',
                    'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
                    'Cache-Control':'max-age=0',
                    'Content-Type':'application/x-www-form-urlencoded',
                    'Host':'stats.customs.gov.cn',
                    'Origin':'http://stats.customs.gov.cn',
                    'Proxy-Connection':'keep-alive',
                    'Upgrade-Insecure-Requests':'1',
                    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.64',
                    'Cookie': self.getcookie()
                }
                response=requests.post(url=self.downUrl,data=data,headers=header,verify=False,timeout=20)
                # response.encoding = response.apparent_encoding
                response.encoding = 'GB2312'
                statuscode=response.status_code
                if statuscode == 200:
                    try:
                        csv_content = response.text
                        count = csv_content.count("\n")
                        csv_content=csv_content.replace('\0', '').replace('\r','')
                        print(count)
                        # filename='数据文件.csv'
                        tmppath='D:\\hg\\tmp'
                        # save_dir = os.path.dirname(tmppath)
                        os.makedirs(tmppath, exist_ok=True)
                        filename = os.path.join(tmppath, filename)
                        with open(filename, 'w') as file:
                            file.write(csv_content)
                        print('CSV文件下载保存成功！')
                        break
                    except Exception as e:
                        print(e)
                        statuscode=411
                else:
                    print('CSV文件下载保存失败！')

            except Exception as e:
                print(data)
                print(e)
                statuscode=412
                continue
            print(f'statuscode:{statuscode}')
        return filename
    #统计数据的文件路径设置单个字段
    def filepath(self,iEType,currencyType,year,startMonth,endMonth,outerField1):
        path=self.config.get('param', 'path')
        field_name=self.getfieldName(outerField1)
        iEType_name=self.getiETypeName(iEType)
        if startMonth<endMonth:
            start_str = '01'
            end_str = "{:02d}".format(endMonth)
            try:
                filename=str(year)+start_str+'-'+end_str+'--'+field_name+'--'+iEType_name+'.csv'
                filepath=path+str(year)+'\\'+end_str+'\\累计\\'
            except Exception as e:
                print(e)
        else:
            end_str = "{:02d}".format(endMonth)
            filename=str(year)+end_str+'--'+field_name+'--'+iEType_name+'.csv'
            filepath=path+str(year)+'\\'+end_str+'\\单月\\'

        save_dir = os.path.dirname(filepath)
        os.makedirs(save_dir, exist_ok=True)
        filename = os.path.join(save_dir, filename)

        return filename

    #统计数据的文件路径设置单个字段
    def codeFilepath(self,iEType,currencyType,year,startMonth,endMonth,outerField1,code):
        path=self.config.get('param', 'path')
        field_name=self.getfieldName(outerField1)
        iEType_name=self.getiETypeName(iEType)
        if startMonth<endMonth:
            start_str = '01'
            end_str = "{:02d}".format(endMonth)
            filename=str(year)+"年--"+start_str+"月-"+end_str+"月--"+iEType_name+"--商品-贸易伙伴-贸易方式-注册地--"+str(code)+".csv";
            filepath=path+str(year)+'\\'+end_str+'\\累计\\'+field_name+'\\'
        else:
            end_str = "{:02d}".format(endMonth)
            filename=str(year)+"年--"+end_str+"月--"+iEType_name+"--商品-贸易伙伴-贸易方式-注册地--"+str(code)+".csv";
            filepath=path+str(year)+'\\'+end_str+'\\单月\\'+field_name+'\\'
        save_dir = os.path.dirname(filepath)
        os.makedirs(save_dir, exist_ok=True)
        filename = os.path.join(save_dir, filename)
        return filename

    #统计数据的文件路径设置单个字段
    def codeFilepathMsg(self,iEType,currencyType,year,startMonth,endMonth,outerField1):
        path=self.config.get('param', 'path')
        field_name=self.getfieldName(outerField1)
        iEType_name=self.getiETypeName(iEType)
        if startMonth<endMonth:
            start_str = '01'
            end_str = "{:02d}".format(endMonth)
            fn=str(year)+"年--"+start_str+"月-"+end_str+"月--"+iEType_name+"_文件统计.csv";
            filepath=path+str(year)+'\\'+end_str+'\\'
        else:
            end_str = "{:02d}".format(endMonth)
            fn=str(year)+"年--"+end_str+"月--"+iEType_name+"_文件统计.csv";
            filepath=path+str(year)+'\\'+end_str+'\\'

        save_dir = os.path.dirname(filepath)
        os.makedirs(save_dir, exist_ok=True)
        filemsg = os.path.join(save_dir, fn)
        return filemsg

    def getfieldName(self,outerField1):
        field_name=''
        if 'CODE_TS' in outerField1:
            #商品信息
            field_name='商品'
        elif 'ORIGIN_COUNTRY' in outerField1:
            #国家
            field_name='贸易伙伴'
        elif 'TRADE_MODE' in outerField1:
            #
            field_name='贸易方式'
        elif 'TRADE_CO_PORT' in outerField1:
            #国内省份
            field_name='收发货地址'
        return field_name

    def getiETypeName(self,iEType):
        iETypeName=''
        if 0==iEType:
            iETypeName='出口'
        elif 1==iEType:
            iETypeName='进口'
        elif 10==iEType:
            iETypeName='进出口'

        return iETypeName

    #单个字段的参数设置
    def setparam(self,iEType,currencyType,year,startMonth,endMonth,outerField1):
        if year<2022:
            selectTableState= 2  #202202前的数据为2 后的数据是1
        else:
            s=int(startMonth)
            e=int(endMonth)
            if year==2022 and s<e: #2022年累计数据单独设置参数
                selectTableState= 3
            elif year==2022 and e==1:
                selectTableState= 2
            else:
                selectTableState= 1  #202202前的数据为2 后的数据是1
        outerValue1='87036011,87036012,87036013,87036019,87036021,87036022,87036023,87036029,87036031,87036032,87036033,87036039,87036041,87036042,87036043,87036049,87036051,87036052,87036053,87036059,87036061,87036062,87036063,87036069,87036071,87036072,87036073,87036079,87037011,87037012,87037013,87037019,87037021,87037022,87037023,87037029,87037031,87037032,87037033,87037039,87037041,87037042,87037043,87037049,87037051,87037052,87037053,87037059,87037061,87037062,87037063,87037069,87037071,87037072,87037073,87037079,40111000,40112000,40121100,40121200,40122010,40129020,40131000,70071190,70072190,70091000,85229091,85269110,85272100,85272900,85392130,85392930,94019910,28046117,28046119,38180011,38180019,85044030,85414200,85414300,84723090,84729040,85258120,85258220,85258320,85258921,85258922,85258923,85258929,85286210,85286220,85286290,85286910,85286990,90065310,90065390,90065930,90065941,90065949'
        param={
            'pageSize': 10,
            'iEType': iEType,
            'currencyType': currencyType,
            'year': year,
            'startMonth': startMonth,
            'endMonth': endMonth,
            'monthFlag':'',
            'unitFlag': False,
            'unitFlag1': False,
            'codeLength': '8',
            'outerField1': outerField1,
            'outerField2':'',
            'outerField3':'',
            'outerField4':'',
            'outerValue1':outerValue1,
            'outerValue2':'',
            'outerValue3':'',
            'outerValue4':'',
            'orderType': 'CODE ASC DEFAULT',
            'selectTableState': selectTableState,  #202201前的数据为2 后的数据是1
            'currentStartTime': '202202', #2022年1月数据需要单独处理
        }
        return param

    #联合查询字段的参数设置
    def setcodesAndProductparam(self,iEType,currencyType,year,startMonth,endMonth,outerField1,filedCode):
        if year<2022:
            selectTableState= 2  #202202前的数据为2 后的数据是1
        else:
            s=int(startMonth)
            e=int(endMonth)
            if year==2022 and s<e: #2022年累计数据单独设置参数
                selectTableState= 3
            elif year==2022 and e==1:
                selectTableState= 2
            else:
                selectTableState= 1  #202202前的数据为2 后的数据是1
        param={
            'pageSize': 10,
            'iEType': iEType,
            'currencyType': currencyType,
            'year': year,
            'startMonth': startMonth,
            'endMonth': endMonth,
            'monthFlag':'',
            'unitFlag': False,
            'unitFlag1': False,
            'codeLength': '8',
            'outerField1': 'CODE_TS',
            'outerField2':'ORIGIN_COUNTRY',
            'outerField3':'TRADE_MODE',
            'outerField4':'TRADE_CO_PORT',
            'outerValue1': filedCode,
            'outerValue2':'',
            'outerValue3':'',
            'outerValue4':'',
            'orderType': 'CODE ASC DEFAULT',
            'selectTableState': selectTableState,
            'currentStartTime': '202202',
        }
        return param

    #将临时文件放复制到目录中
    def tmpToFile(self,tmpfilename,filePathName):
        # 打开csv文件
        with open(tmpfilename, 'r') as file:
            # 创建csv阅读器
            csv_reader = csv.reader(file)
            # 使用len()函数获取行数
            line_count = len(list(csv_reader))
        if line_count > 9990:
            print('csv文件行数过大需要对编码进行拆分')
            os.remove(tmpfilename)
            return ''
        else:
            shutil.copy(tmpfilename, filePathName)
            os.remove(tmpfilename)

        return   filePathName
    #将临时文件放复制到目录中
    def tmpFileLength(self,tmpfilename):
        flag=True
        # 打开csv文件
        with open(tmpfilename, 'r') as file:
            # 创建csv阅读器
            csv_reader = csv.reader(file)
            # 使用len()函数获取行数
            line_count = len(list(csv_reader))
        if line_count > 9990:
            print('csv文件行数过大需要对编码进行拆分')
            flag=False

        return  flag

    def readcsv(self,filePath):
        codes=[]
        with open(filePath, newline='') as csvfile:
            reader = csv.reader(csvfile)
            #跳过第一条数据
            next(reader)
            for row in reader:
                # print(row[0])
                codes.append(str(row[0]))
        return codes
    #下载获取字段的编码信息
    def field1Down(self,year,endMonth):
        fieldFileList=[]
        current_date = datetime.now()
        # year = current_date.year
        # year = int(self.config.get('param', 'year'))
        year = int(year)
        month = current_date.month
        iETypes=[0,1,10]
        outerFields=['CODE_TS']
        # outerFields=['CODE_TS']
        currencyType='usd'
        # endMonth=self.r.get('newMonth')
        # endMonth=int(endMonth.decode('utf-8'))
        # endMonth=int(self.config.get('param', 'endMonth'))
        # if endMonth != (month-1):
        #     return
        if endMonth==1:
            startMonths=[1]
        else:
            startMonths=[1,endMonth]
        for startMonth in startMonths:
            for iEType in iETypes:
                for outerField1 in outerFields:
                    param=self.setparam(iEType,currencyType,year,startMonth,endMonth,outerField1)
                    filePathName=self.filepath(iEType,currencyType,year,startMonth,endMonth,outerField1)
                    fieldFileList.append(filePathName)
                    if os.path.exists(filePathName):
                        continue
                    tmpfilename=self.reqDownFile(param)
                    saveFileName=self.tmpToFile(tmpfilename,filePathName)
                    print(saveFileName)

        return fieldFileList

    #下载商品编码的内容信息
    def fieldCodeDown(self,iEType,currencyType,year,startMonth,endMonth,outerField1,codes):
        codeFileList=[]
        #对数据进行变量分组
        codeList=self.group_elements(codes)
        for k in range(0,len(codeList)):
            code=codeList[k]
            filecodes='cc'+str(k)
            #拼接参数
            param=self.setcodesAndProductparam(iEType,currencyType,year,startMonth,endMonth,outerField1,code)
            #生成参数对应的文件路径
            filePathName=self.codeFilepath(iEType,currencyType,year,startMonth,endMonth,outerField1,filecodes)
            if os.path.exists(filePathName):
                print(f'文件已存在{filePathName}')
                codeFileMsg={
                    '文件名':filePathName,
                    '商品编码':code,
                }
                codeFileList.append(codeFileMsg)
                continue
            tmpfilename=self.reqDownFile(param)
            fflag=self.tmpFileLength(tmpfilename)
            #判断文件行数是否接近1万
            if fflag: #小于1万保存数据
                #校验临时的金额是否跟统计文件中的对应
                flagg=self.verifyFile(tmpfilename,year,startMonth,endMonth,outerField1,iEType,currencyType)
                #将临时文件的数据复制到指定文件中
                if flagg:
                    #将下载的临时文件复制到规定的文件中
                    saveFileName=self.tmpToFile(tmpfilename,filePathName)
                else:
                    saveFileName=''
            else:
                saveFileName=''
            #文件行数超过接近1万时需要对编码进行拆分进行重新下载
            if saveFileName=='':
                cds=code.split(',')
                for j in range(0,len(cds)):
                    code=cds[j]
                    filecodes='cc'+str(k)+'_'+str(j)
                    #拼接参数
                    param=self.setcodesAndProductparam(iEType,currencyType,year,startMonth,endMonth,outerField1,code)
                    #生成参数对应的文件路径

                    filePathName=self.codeFilepath(iEType,currencyType,year,startMonth,endMonth,outerField1,filecodes)
                    if os.path.exists(filePathName):
                        print(f'文件已存在{filePathName}')
                        codeFileMsg={
                            '文件名':filePathName,
                            '商品编码':code,
                        }
                        codeFileList.append(codeFileMsg)
                        continue
                    tmpfilename=self.reqDownFile(param)
                    #校验临时的金额是否跟统计文件中的对应
                    flagg=self.verifyFile(tmpfilename,year,startMonth,endMonth,outerField1,iEType,currencyType)
                    #将临时文件的数据复制到指定文件中
                    if flagg:
                        #将下载的临时文件复制到规定的文件中
                        saveFileName=self.tmpToFile(tmpfilename,filePathName)
                    # #将下载的临时文件复制到规定的文件中
                    # saveFileName=self.tmpToFile(tmpfilename,filePathName)

            print(saveFileName)
            codeFileList.append(saveFileName)
            filemsg=self.codeFilepathMsg(iEType,currencyType,year,startMonth,endMonth,outerField1)
        return codeFileList,filemsg


    def verifyFile(self,tmpfilename,year,startMonth,endMonths,outerField1,iEType,currencyType):
        flag=False
        path='D:\\hg\\'
        years=year
        endMonths=endMonths
        end_str=int(endMonths)
        startMonths=startMonth
        if startMonths<=end_str:
            try:
                filePathName=self.filepath(iEType,currencyType,year,startMonth,endMonth,outerField1)
            except :
                traceback.print_exc()
            try:
                dfAll = pd.read_csv(filePathName, encoding='gbk',dtype=str)
                dfAll['美元'] = dfAll['美元'].str.replace(',', '').astype(float)
                ddf = pd.read_csv(tmpfilename, encoding='gbk',dtype=str)
                ddf['美元']=pd.to_numeric(ddf['美元'].str.replace(',', '').astype(float))
                column_sum = ddf.groupby('商品编码')['美元'].sum()
                sumList=column_sum.reset_index().values.tolist()
                for codesum in sumList:
                    codeId=codesum[0]
                    cvalue=codesum[1]
                    row =dfAll.loc[dfAll['商品编码']==codeId]
                    try:
                        usvalue = row.at[row.index[-1], '美元']
                        if usvalue==cvalue:
                            flag=True
                    except Exception as e:
                        print(e)
            except Exception as e22:
                print(e22)
        return flag

    #详情商品信息参数拼接
    def codeFieldDown(self,fieldFileList,year,endMonth):
        current_date = datetime.now()
        # year = current_date.year
        year = int(year)
        # endMonth=self.r.get('newMonth')
        # endMonth=int(endMonth.decode('utf-8'))
        # endMonth=int(self.config.get('param', 'endMonth'))
        codeFileList=[]
        for fieldFile in fieldFileList:
            #['CODE_TS','ORIGIN_COUNTRY','TRADE_MODE','TRADE_CO_PORT']
            try:
                outerField1=['CODE_TS']
                if '单月' in fieldFile:
                    startMonth=endMonth
                if '累计' in fieldFile:
                    startMonth=1
                if '--进口' in fieldFile:
                    iEType=1
                if '--出口' in fieldFile:
                    iEType=0
                if '--进出口' in fieldFile:
                    iEType=10
                currencyType='usd'
                codes=hgDownFile.readcsv(fieldFile) #获取商品编码
                # 进行下载
                codeFileList,filemsg=hgDownFile.fieldCodeDown(iEType,currencyType,year,startMonth,endMonth,outerField1,codes)

            except Exception as e:
                # print(e)
                traceback.print_exc()
                continue

        return codeFileList,filemsg
    def group_elements(self,codes):
        groups = [codes[i:i+8] for i in range(0, len(codes), 8)]
        result = [','.join(group) for group in groups]
        return result

    def outfilemsg(self,codeFileList,filemsg):
        # 输出字典数据到CSV文件
        with open(filemsg, 'w', newline='') as file:
            fieldnames = ['文件名', '商品编码']
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(codeFileList)

if __name__ == '__main__':
    hgDownFile=HgDownFile()

    yss=hgDownFile.config.get('param', 'year')
    ss=hgDownFile.config.get('param', 'endMonth')

    # newMonth = hgDownFile.r.get("newMonth").decode('utf-8')
    # ms = "{:02d}".format(int(ss))
    #设置采集中
    # hgDownFile.r.set("newhgdatastatus"+yss+"_"+ms,1)
    try:
        for ye in yss.split(','):
            year=int(ye)
            for s in ss.split(','):
                endMonth=int(s)
                print(f'year:{year} end:{endMonth}')
                try:
                    fieldFileList=hgDownFile.field1Down(year,endMonth)
                    if endMonth==1:
                        while len(fieldFileList)< 3:
                            fieldFileList=hgDownFile.field1Down(year,endMonth)
                            if len(fieldFileList)>= 3:
                                break
                    else:
                        while len(fieldFileList)< 6:
                            fieldFileList=hgDownFile.field1Down(year,endMonth)
                            if len(fieldFileList)>= 6:
                                break
                    for i in range(1,3):
                        codeFileList,filemsg=hgDownFile.codeFieldDown(fieldFileList,year,endMonth)
                    #输出文件和商品编码对应到文件中
                    hgDownFile.outfilemsg(codeFileList,filemsg)
                except Exception as ee:
                    print(ee)
    except  Exception as e:
        print(e)
        # hgDownFile.r.set("newhgdatastatus"+yss+"_"+ms,3)
    #设置采集结束
    # hgDownFile.r.set("newhgdatastatus"+yss+"_"+ms,2)



