#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time    : 2021/5/11 20:18
# @Author  : 程婷婷
# @FileName: XgboostClassifyModel.py
# @Software: PyCharm
from model.base.views.model.BaseModel import BaseModel
import numpy as np
from sklearn.cluster import KMeans
from sklearn import metrics


class KmeansModel(BaseModel):
    def __init__(self, config_path):
        super().__init__(config_path)


    def chose_k(self, data):
        silhouette_int = -1  # 初始化的平均轮廓系数阀值
        for n_clusters in range(3, 20):
            kmeans = KMeans(n_clusters=n_clusters,
                            init=self.model_config['init'],
                            n_init=self.model_config['n_init'],
                            max_iter=self.model_config['max_iter'])
            cluster_labels_tmp = kmeans.fit_predict(data)  # 训练聚类模型
            silhouette_tmp = metrics.silhouette_score(data, cluster_labels_tmp)  # 得到每个K下的平均轮廓系数
            if silhouette_tmp > silhouette_int:  # 如果平均轮廓系数更高
                best_k = n_clusters  # 将最好的K存储下来
                silhouette_int = silhouette_tmp  # 将最好的平均轮廓得分存储下来
                best_kmeans = kmeans  # 将最好的模型存储下来
        print('=========已获得最优模型，共分为%d类========='%best_k)
        return best_kmeans



    def building_model(self, data):
        if not self.model_config['n_clusters']:
            model = self.chose_k(data)
        else:
            kmeans = KMeans(n_clusters=self.model_config['n_clusters'],
                            init=self.model_config['init'],
                            n_init=self.model_config['n_init'],
                            max_iter=self.model_config['max_iter'])
            model = kmeans.fit(data)
            print('=========共分为%s类=========' % str(self.model_config['n_clusters']))
        classes = model.labels_
        print(list(set(classes)))
        data_cluster = [[] for i in range(max(classes)+1)]
        result = [[] for j in range(max(classes)+1)]
        for i in range(np.array(data).shape[0]):
            for j in range(max(classes)+1):
                if classes[i] == j:
                    result[j].append(i)
                    data_cluster[j].append(data[i])
        self.save(model)
        return model, data_cluster, result
