#coding:utf-8
# 百度翻译 不登录翻译1000字 登录翻译5000字
import re
import string
import time
from urllib.parse import quote

import psutil
import pymongo
from bs4 import BeautifulSoup
from bson import ObjectId
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.support.wait import WebDriverWait
# from selenium.webdriver.chrome.service import Service
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.proxy import Proxy, ProxyType
from func_timeout import func_set_timeout
from base.BaseCore import BaseCore

baseCore = BaseCore()


class Translate():
    def __init__(self):
        self.url = "https://fanyi.baidu.com/#"
        self.header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"}
        self.browser = self.createDriver()
        self.db_storage = \
            pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').中科软[
                '数据源_0106']

    def close(self):
        self.browser.quit()

    def is_website_link(self,string):
        pattern = r"^(http|https)?(://)?[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+(/[a-zA-Z0-9-_.?=/]*)?$"
        if re.match(pattern, string):
            return True
        else:
            return False

    def createDriver(self):
        proxy_ = baseCore.get_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_preference('network.proxy.type',1)
        profile.set_preference('network.proxy.http',proxy_['http'].split('://')[1].split(':')[0])
        profile.set_preference('network.proxy.http_port',int(proxy_['http'].split('://')[1].split(':')[1]))
        profile.set_preference('network.proxy.ssl',proxy_['http'].split('://')[1].split(':')[0])
        profile.set_preference('network.proxy.ssl_port',int(proxy_['http'].split('://')[1].split(':')[1]))
        profile.update_preferences()
        service = Service(r'F:\spider\firefox\geckodriver_1.exe')
        options = Options()
        options.set_preference("general.useragent.override",
                               "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
        browser = webdriver.Firefox(firefox_profile=profile, service=service,options=options)
        return browser

    def kill_firefox(self):
        for proc in psutil.process_iter():
            try:
                if proc.name() == "firefox.exe":
                    proc.kill()
            except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
                pass

    def translate(self, sentence, lang):
        sentence_ = sentence
        wait = WebDriverWait(self.browser, 20)
        try:
            word_type = self.get_input_language_type(sentence_, wait)
        except:
            self.browser.quit()
            self.browser = self.createDriver()
            result = self.translate(sentence_, lang)
            return result

        if word_type:
            if word_type == lang:
                pass
            else:
                word_type = lang
            url = self.url.format(word_type, 'zh', sentence_)
            url = quote(url, safe='/:#')
            self.browser.set_page_load_timeout(10)
            try:
                self.browser.get(url)
                wait.until(EC.presence_of_element_located(
                    (By.XPATH, '//*[@id="main-outer"]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/div[1]/p[2]')))
                result_ = self.browser.find_element(By.XPATH,
                                                    '//*[@id="main-outer"]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/div[1]/p[2]')
                result = result_.text.strip()
                return result
            except:
                self.browser.quit()
                self.browser = self.createDriver()
                result = self.translate(sentence_, lang)
                return result

    @func_set_timeout(90)
    def get_input_language_type(self, word, wait):
        self.browser.get("https://fanyi.baidu.com/")
        wait.until(EC.presence_of_element_located((By.ID, "baidu_translate_input")))
        input_word = self.browser.find_element(By.ID, "baidu_translate_input")
        input_word.send_keys(word)
        wait.until(EC.presence_of_element_located(
            (By.XPATH, '//*[@id="main-outer"]/div/div/div[1]/div[1]/div[1]/a[1]/span/span')))
        word_type = self.browser.find_element(By.XPATH,
                                              '//*[@id="main-outer"]/div/div/div[1]/div[1]/div[1]/a[1]/span/span')
        word_type = word_type.get_attribute("data-lang")
        return word_type

    def is_punctuation(self, char):
        punctuation = string.punctuation + '、' + '（' + '…' + '）' + '《' + '》' + '“' + '”' + '：' + '；' + '！' + '　' + '。'

        return char in punctuation

    def sentence_split_sentence(self, contentWithTag):
        pattern = re.compile(r'[^\n]+(?=\n)|[^\n]+$')
        match_group = pattern.finditer(contentWithTag)
        sentences = []
        if match_group:
            for _ in match_group:
                start_end_index = _.span()
                sentences.append((start_end_index[0], start_end_index[1], _.group()))
        if (not sentences) and (len(contentWithTag) >= 4):
            sentences.append((0, len(contentWithTag), contentWithTag))
        return sentences

    def jionstr(self, html):
        paragraphs = []
        current_sentence = ''
        for tag in html.find_all(text=True):
            sentence = str(tag)
            if sentence == '\n' or sentence == '\t' or sentence == ' ':
                continue
            if self.is_punctuation(sentence):
                continue
            if sentence.startswith('https://') or sentence.startswith('http://') or sentence.startswith('www.'):
                continue
            # 检查拼接后的句子长度是否超过1000字
            if len(current_sentence) + len(sentence) <= 1000:
                current_sentence += sentence
            else:
                paragraphs.append(current_sentence.strip())
                current_sentence = sentence
        return paragraphs

    def gethtml(self, contentWithTag):
        tag_list = []
        html = BeautifulSoup(contentWithTag, 'html.parser')
        content = html.text
        lang = baseCore.detect_language(content)
        if lang == 'zh':
            return contentWithTag
        for tag in html.find_all(text=True):
            sentence = str(tag).strip()
            tag_list.append(sentence)
        sentence = ''
        num = 0
        for tag in tag_list:
            if tag.strip() == '':
                continue
            if self.is_website_link(str(tag).strip()):
                continue
            sentence += f'{tag}😊'
            num += 1
        #print(num)
        #     if len(sentence) == 1:
        #         continue
        #     if sentence == '\n' or sentence == '\t' or sentence == ' ':
        #         continue
        #     if self.is_punctuation(sentence):
        #         continue
        #print(sentence)
        result = ''
        while True:
            if len(sentence.strip()) == 1 and self.is_punctuation(sentence.strip()):
                result += sentence
                break
            if len(sentence) > 1000:
                index_1000 = sentence[999]
                # 判断该字符是不是逗号或句号
                if index_1000 == '.' or index_1000 == '。' or index_1000 == ',' or index_1000 == '，':
                    # 如果是标点符号
                    result += self.translate(sentence[:1000].strip(), lang)
                    sentence = sentence[1000:]
                else:
                    # 如果不是标点符号
                    i = 1000
                    while i >= 0:
                        j = i - 1
                        if j <= 0:
                            break
                        index_punctuation = sentence[j]
                        if index_punctuation == '.' or index_punctuation == '。' or index_punctuation == ',' or index_punctuation == '，':
                            result += self.translate(sentence[:j + 1].strip(), lang)
                            sentence = sentence[j + 1:]
                            # result += self.translate(sentence[j + 1:].strip(), lang)
                            break
                        else:
                            i = j
                            continue
                    if i == 1:
                        result += self.translate(sentence[:1000].strip(), lang)
                        sentence = sentence[1000:]
            else:
                # 翻译
                result += self.translate(sentence, lang)
                time.sleep(2)
                break
        #print(result)
        sentences = result.split('😊')
        #print(len(sentences))
        num = 0
        for tag in html.find_all(text=True):
            if tag.strip() == '':
                continue
            if self.is_website_link(str(tag).strip()):
                continue
            #print(num,tag)
            sentence = sentences[num]
            tag.replace_with(sentence)
            num += 1
        return str(html.prettify()) + '<p/><br>译文来源：微软自动翻译<br></p>'


# if __name__ == "__main__":
#     test = Translate()
#     db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017/', username='admin', password='ZZsn@9988').中科软[
#         '数据源_0504']
#     data = db_storage.find_one({'_id': ObjectId('656f14e84d6d77428c713271')})
#     a = data['richTextForeign']
#     result = test.gethtml(a)
#     print(result)
#     test.close()