#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @File    : Operation.py
# @Time    : 2023/3/21 08:25
# @Author  : bruxelles_li
# @Software: PyCharm
import os
import subprocess
import sys
from flask import Flask, request, jsonify
from pathlib import Path
import shutil
from shutil import Error
from tqdm import tqdm
import requests
import socket
import datetime
import pandas as pd
import glob

# 追加工作路径
sys.path.append('../')
from base.app.base_app import *
from File_Operation.smart_extractor import extract_by_html_test


# 定义operation_prefix
operation_prefix = "/platform/operation/process"  # 上传、删除、测试、发布
operation_file = Blueprint(f'{operation_prefix}', __name__)
UPLOAD_FOLDER = r'../datasets/classification/FastText-Model/'  # 上传路径
Path(UPLOAD_FOLDER).mkdir(parents=True, exist_ok=True)

app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
file_types = ['xls', 'xlsx']
# 获取父进程的环境变量
env = os.environ.copy()
env['PYTHONPATH'] = "/home/python/anaconda3/envs/JXYQ@py39/lib/python3.9/site-packages"


# 找到目标文件并将文件移动到新目录下
def find_dir(dir_path, folder_path):
    list_dir = os.listdir(dir_path)
    sorted_list = sorted(list_dir, key=lambda x: x[7:], reverse=True)
    # 取第二个
    source_path = os.path.join(dir_path, sorted_list[1])
    for file_name in os.listdir(source_path):
        file_path = os.path.join(source_path, file_name)
        # 如果是xlsx文件，复制到基于时间戳的目录下
        if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
            shutil.copy(file_path, folder_path)
    print("success")


def check_port(port):
    try:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.bind(('localhost', port))
            print(f"Port {port} is available")
    except socket.error as e:
        print(f"Port {port} is already in use")


def get_available_port(start_port, end_port):
    for port in range(start_port, end_port+1):
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            try:
                s.bind(('localhost', port))
                return port
            except socket.error as e:
                continue
    raise Exception("No available ports in the specified range")


def get_random_available_port():
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(('localhost', 0))
        return s.getsockname()[1]


def all_exist(avalue, bvalue):
    if all(any(x in y for y in bvalue) for x in avalue):
        return "True"
    return "False"


def judge_len(file):
    if len(file) == 2 or len(file) == 4:
        return "True"
    return "False"


def merge_df(dataset_path):
    all_files = []
    for file_type in file_types:
        all_files.extend(glob.glob(os.path.join(dataset_path, f'*.{file_type}')))

    # 将所有文件合并到一个DataFrame中
    combined_df = pd.concat([pd.read_excel(f) for f in all_files], ignore_index=True)
    # 去除重复行
    combined_df.drop_duplicates(keep='first', inplace=True)
    return combined_df


@operation_file.route('/show_file/', methods=['POST'])
def show():
    try:
        data = json.loads(request.data.decode('utf-8'))
        file_path = data["file_path"]
        file_list = os.listdir(file_path)
        logger.info(file_list)
        if file_list:
            new_file_list = []
            for file_name in file_list:
                if "Origin-Model" in file_name:
                    continue
                else:
                    new_file_list.append(file_name)
            result = {
                'handleMsg': 'Success',
                'code': 200,
                'logs': '处理成功！',
                "resultData": ";".join(new_file_list)
            }
            app.logger.info(result)
            return jsonify(result)
        else:
            result = {
                'handleMsg': 'Success',
                'code': 200,
                'logs': '处理成功！',
                "resultData": "当前查询的文件路径下内容为空!"
            }
            app.logger.info(result)
            return jsonify(result)

    except Exception as e:
        # print(e)
        result = {
            'handleMsg': 'Failure',
            'code': 500,
            'logs': '处理失败！当前查询的文件路径不存在，请选择正确的路径参数后重新操作' + str(e),
            "resultData": ""
        }
        app.logger.info(result)
        return jsonify(result)


@operation_file.route('/remove_file/', methods=['POST'])
def remove():
    try:
        data = json.loads(request.data.decode('utf-8'))
        file_path = data["file_path"]
        flag = data["flag"]
        if flag == "/":
            os.remove(file_path)
        else:
            shutil.rmtree(file_path)
        result = {
            "code": 200,
            'handleMsg': 'Success',
            'resultData': '文件删除成功！',
            'logs': None
        }
        app.logger.info(result)
        return jsonify(result)
    except Exception as e:
        result = {
            "code": 500,
            'handleMsg': 'Failure',
            'resultData': None,
            'logs': '删除失败，当前文件不存在，请选择正确的文件路径参数后重新删除！' + str(e)
        }
        app.logger.info(result)
    return jsonify(result)


# todo: 先进行语料上传操作
@operation_file.route('/upload_file/', methods=['GET', 'POST'])
def upload_file():
    try:
        # todo: 采用requests请求下载文件，包含任务id 和 url
        data = json.loads(request.data.decode('utf-8'))
        request_url = data["request_url"]   # http://114.115.215.96/group1/M00/01/A3/wKjIbGSFKouAPnsHAApkU0_Y0Bg21.xlsx
        task_id = data["task_id"]
        # 先判断该任务id的语料路径是否存在，若存在，则继续判断是否
        root_path = app.config['UPLOAD_FOLDER'] + task_id
        if os.path.exists(root_path):
            # 该语料路径存在，则将对下面的文件目录进行遍历，并创建一个带时间戳的目录，来存放新的文件以及当前待下载的文件
            folder_name = "floder" + "-" + str(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
            folder_path = app.config['UPLOAD_FOLDER'] + task_id + "/" + folder_name
            Path(folder_path).mkdir(parents=True, exist_ok=True)
            # 调用递归函数traverse_dir 先将该目录下之前的文件复制到该目录下
            find_dir(dir_path=root_path, folder_path=folder_path)
            # 然后将当前传过来的语料也放入到该文件夹下
            filename = request_url.split("/")[-1]
            save4path = os.path.join(folder_path, filename)
            # 下载文件
            r = requests.get(request_url, stream=True)
            with open(save4path, "wb") as f:
                for chunk in r.iter_content(chunk_size=512):
                    f.write(chunk)
            # 再调用merge_df合并文件
            combined_df = merge_df(folder_path)
            print(f"合并后的长度为{len(combined_df)}")
            merge_filename = str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) + ".xlsx"
            combined_df.to_excel(os.path.join(folder_path, merge_filename), index=False)
            # 删除其它文件
            for file_name in os.listdir(folder_path):
                if file_name != merge_filename:
                    os.remove(os.path.join(folder_path, file_name))

        else:
            # 该语料路径不存在，则创建目录后下载文件
            folder_name = "floder" + "-" + str(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
            folder_path = app.config['UPLOAD_FOLDER'] + task_id + "/" + folder_name
            Path(folder_path).mkdir(parents=True, exist_ok=True)
            # 将当前传过来的语料也放入到该文件夹下
            filename = request_url.split("/")[-1]
            save4path = os.path.join(folder_path, filename)
            # 下载文件
            r = requests.get(request_url, stream=True)
            with open(save4path, "wb") as f:
                for chunk in r.iter_content(chunk_size=512):
                    f.write(chunk)

        result = {
            "code": 200,
            'handleMsg': 'Success',
            'logs': '文件上传成功！',
            "resultData": folder_name

        }
        app.logger.info(result)
        return jsonify(result)  # 返回保存成功的信息

    except Error as e:
        result = {
            "code": 500,
            'handleMsg': 'Failure',
            'resultData': None,
            'logs': '上传失败，当前上传的语料版本名称已经存在！' + str(e)
        }
        app.logger.info(result)
        return jsonify(result)
    except Exception as e1:
        result = {
            "code": 500,
            'handleMsg': 'Failure',
            'resultData': None,
            'logs': '上传失败！' + str(e1)
        }
        app.logger.info(result)
        return jsonify(result)


@operation_file.route('/publish_version/', methods=['POST'])
def publish():
    try:
        data = json.loads(request.data.decode('utf-8'))
        model_version = data['trainModelName']
        task_id = data["task_id"]
        classification = r'../../../model_saved/classification/FastText-Model/'

        model_path = classification + task_id + "/" + model_version
        micro_server_port = get_available_port(start_port=3000, end_port=3050)  # 自查可用端口   ，范围：3000 - 3050
        print(micro_server_port)
        # subprocess.call(
        #     "python ../app/app_run.py -model_path {} -micro_server_port {}".format(model_path, int(micro_server_port)),
        #     shell=True)
        # subprocess.call(['python', '../app/app_run.py', '-model_path', model_path, '-micro_server_port', str(micro_server_port)],
        #                 env=env,
        #                 executable=sys.executable)
        cmd = ['python', '../app/app_run.py', '-model_path', model_path, '-micro_server_port', str(micro_server_port)]
        # 后台启动子进程
        subprocess.Popen(cmd,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         stdin=subprocess.PIPE,
                         shell=False,
                         close_fds=True,
                         preexec_fn=os.setsid,
                         env=env,
                         executable=sys.executable
                         )
        config_json = json.load(open('../config.json', 'r', encoding='utf-8'))
        publish_url = 'http://{}:{}/platform/classification/FastText-Model/model_pred/'.format(config_json['ip'], micro_server_port)
        result = {
            'handleMsg': 'Success',
            'code': 200,
            'logs': None,
            "resultData": publish_url
        }
    except Exception as e:
        # print(e)
        app.logger.info(e)
        result = {
            'handleMsg': 'failure',
            'code': 500,
            'logs': '模型发布失败，请检查参数后重新发布！' + str(e),
            "resultData": None
        }
    app.logger.info(result)
    return jsonify(result)


@operation_file.route('/model_test/', methods=['POST'])
def model_test():
    try:
        data = json.loads(request.data.decode('utf-8'))
        task_id = data['task_id']
        model_version = data['trainModelName']
        type = data['data_type']
        request_url = data['request_url']
        # 定义模型root路径
        classification = r'../../../model_saved/classification/FastText-Model/'
        # 定义模型应用文件存放路径
        model_path = classification + "Origin-Model-2023_03_31-12_15_17"
        # 定义模型文件副本保存路径，若模型副本路径存在，则不需要备份，若模型副本不存在，需备份模型文件
        model_copy = classification + "copy_model"
        if not os.path.exists(model_copy):
            # 若模型副本文件不存在，需备份，先创建副本保存路径
            Path(model_copy).mkdir(parents=True, exist_ok=True)
            # 将上版模型文件移动到副本保存路径，此时模型应用文件夹下内容为空
            for dirpath, dirnames, filenames in os.walk(model_path):
                # print(filenames)
                for f0 in tqdm(filenames):
                    src_file = os.path.join(dirpath, f0)
                    shutil.move(src_file, model_copy)
        else:
            # 若模型副本文件已经存在，无需备份，将上版模型文件删除，此时模型应用文件夹下内容为空
            for dirpath, dirnames, filenames in os.walk(model_path):
                for f0 in tqdm(filenames):
                    src_file = os.path.join(dirpath, f0)
                    os.remove(src_file)
        # 此时模型应用文件夹下内容为空，下面定义当前模型训练后的文件路径
        src_path = classification + task_id + "/" + model_version
        # 如果当前训练后的模型文件存在并且符合要求，则将当前训练好的模型文件复制一份到模型应用的保存路径
        file_type_list = ['bin', 'json']
        if os.path.exists(src_path):
            for dirpath1, dirnames1, filenames1 in os.walk(src_path):
                for f1 in tqdm(filenames1):
                    file_type = f1.split('.')[1]
                    temp_file = os.path.join(dirpath1, f1)
                    if file_type in file_type_list:
                        shutil.copy(temp_file, model_path)
            # 根据测试方式选择解析方法
            if type.strip() == "url":
                try:
                    # 测试：按HTML采集
                    dict_parse = extract_by_html_test(request_url)
                    # 调用应用环境下的模型测试接口来处理当前数据
                    url = "http://localhost:4005/platform/classification/FastText-Model/model_test/"
                    MODEL_PATH = model_path + "/" + "model.bin"
                    # print(MODEL_PATH)
                    payload = json.dumps({
                        "model_path": MODEL_PATH,
                        "title": dict_parse["title"],
                        "content": dict_parse["content"]
                    })
                    headers = {
                        'Content-Type': 'application/json'
                    }
                    response = requests.request("POST", url, headers=headers, data=payload)
                    text = response.text.encode('utf-8')
                    obj = json.loads(text)
                    label = obj["result"]["label"]
                    result = {
                        'handleMsg': 'success',
                        'code': 200,
                        'logs': None,
                        "resultData": {
                            "title": dict_parse["title"],
                            "content": dict_parse["content"],
                            "label": label
                        }
                    }
                except Exception as e:
                    # 测试完毕，先移除模型应用文件夹中的内容，此时模型应用文件夹下内容为空
                    for dirpath, dirnames, filenames in os.walk(model_path):
                        # print(filenames)
                        for f0 in tqdm(filenames):
                            src_file = os.path.join(dirpath, f0)
                            os.remove(src_file)

                    # 接着将模型副本中的文件复制到模型应用文件夹下
                    for dirpath, dirnames, filenames in os.walk(model_copy):
                        # print(filenames)
                        for f0 in tqdm(filenames):
                            src_file = os.path.join(dirpath, f0)
                            shutil.copy(src_file, model_path)
                    # 所有处理操作完毕，返回模型测试结果
                    result = {
                        'handleMsg': 'failure',
                        'code': 500,
                        'logs': "智能解析url 网页内容失败，请重新选择测试内容" + str(e),
                        "resultData": None
                    }
                    app.logger.info(result)
                    return jsonify(result)
            else:
                # 先下载文件，然后解析文件内容进行处理
                filename = request_url.split("/")[-1]
                save4path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
                # 下载文件
                r = requests.get(request_url, stream=True)
                with open(save4path, "wb") as f:
                    for chunk in r.iter_content(chunk_size=512):
                        f.write(chunk)
                data_df = pd.read_excel(save4path, keep_default_na=False).astype(str)
                list_one = []
                for idx, row in tqdm(data_df.iterrows()):
                    title = row["title"]
                    content = row["content"]
                    # 调用应用环境下的模型测试接口来处理当前数据
                    url = "http://localhost:4005/platform/classification/FastText-Model/model_test/"
                    MODEL_PATH = model_path + "/" + "model.bin"
                    # print(MODEL_PATH)
                    payload = json.dumps({
                        "model_path": MODEL_PATH,
                        "title": title,
                        "content": content
                    })
                    headers = {
                        'Content-Type': 'application/json'
                    }
                    try:
                        response = requests.request("POST", url, headers=headers, data=payload)
                        text = response.text.encode('utf-8')
                        obj = json.loads(text)
                        label = obj["result"]["label"]
                        list_one.append({
                            "title": title,
                            "content": content,
                            "label": label
                        })
                    except:
                        continue
                result = {
                    'handleMsg': 'success',
                    'code': 200,
                    'logs': None,
                    "resultData": list_one
                }

            # 测试完毕，先移除模型应用文件夹中的内容，此时模型应用文件夹下内容为空
            for dirpath, dirnames, filenames in os.walk(model_path):
                # print(filenames)
                for f0 in tqdm(filenames):
                    src_file = os.path.join(dirpath, f0)
                    os.remove(src_file)

            # 接着将模型副本中的文件复制到模型应用文件夹下
            for dirpath, dirnames, filenames in os.walk(model_copy):
                # print(filenames)
                for f0 in tqdm(filenames):
                    src_file = os.path.join(dirpath, f0)
                    shutil.copy(src_file, model_path)
            # 所有处理操作完毕，返回模型测试结果
            return result
        else:
            result = {

                'handleMsg': 'failure',
                'code': 500,
                'logs': f'待测试模型版本-{model_version}不存在，请选择已有的模型版本进行测试！',
                "resultData": None
            }
            app.logger.info(result)
            return jsonify(result)

    except Exception as e:
        # print(e)
        app.logger.info(e)
        result = {
            'handleMsg': 'failure',
            'code': 500,
            'logs': '模型测试失败，请检查参数后重新测试！' + str(e),
            "resultData": None
        }
        app.logger.info(result)
        return jsonify(result)


if __name__ == '__main__':
    port = get_available_port(3000, 3050)
    print(port)
    # app.run(host=HOST, port=PORT, debug=DEBUG)
