# -*- coding: utf-8 -*-
# @Time : 2023/3/7 17:29
# @Author : ctt
# @File : copy_table
# @Project : 表格复制
from copy import deepcopy
from docx import Document

"""
prep_p = p.insert_paragraph_before("段落前插入内容)
document.add_page_break()       # 插入分页符
"""
import re
import json
import pandas as pd
from docx import Document
from docx.document import Document as _Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.table import _Cell, Table, _Row
from docx.text.paragraph import Paragraph
from docx.shared import Pt
# 定义待复制内容的匹配模式
start_pattern = re.compile(r'(?<=[0-9][\.．]会计报表重要项目的明细信息及说明)$|(?<=[0-9][\.．]会计报表重要项目的明细信息及说明。)$')
end_pattern = re.compile(r'(?<=[0-9][\.．]需要说明的其他事项)$|(?<=[0-9][\.．]需要说明的其他事项。)（略）$|(?<=[0-9][\.．]需要说明的其他事项[。\.])$')

# start_pattern1 = re.compile(r'(?<=[0-9][\.．])附件：补充报表$|(?<=[0-9][\.．])附件：$')


def iter_block_items(parent):
    """
    Generate a reference to each paragraph and table child within *parent*,
    in document order. Each returned value is an instance of either Table or
    Paragraph. *parent* would most commonly be a reference to a main
    Document object, but also works for a _Cell object, which itself can
    contain paragraphs and tables.
    """
    if isinstance(parent, _Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    elif isinstance(parent, _Row):
        parent_elm = parent._tr
    else:
        raise ValueError("something's not right")
    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)


# todo: 先复制内容到模板中，保存更新后的模板
def copy_content_main(doc_path: str, temp_path: str):
    doc = Document(doc_path)
    # 新建临时文档
    new_doc = Document()
    start_found = False
    end_found = False
    for element in doc.element.body.xpath("w:p | w:tbl"):
        if isinstance(element, CT_P):
            para = Paragraph(element, doc)
            start_results = re.findall(start_pattern, para.text)
            if start_results:
                # print(para.text)
                start_found = True
                continue

        if isinstance(element, CT_P):
            para = Paragraph(element, doc)
            end_results = re.findall(end_pattern, para.text)
            if end_results:
                # print(para.text)
                end_found = True
                break

        # 复制文本段落
        if start_found and not end_found and isinstance(element, CT_P):
            para = Paragraph(element, doc)
            new_doc.add_paragraph(para.text)

        # 复制表格
        if start_found and not end_found and isinstance(element, CT_Tbl):
            table = Table(element, doc)
            new_table = deepcopy(table._element)
            # 在目标文档添加一个空段落
            new_doc.add_paragraph('')
            # 获取新段落
            new_paragraph = new_doc.paragraphs[-1]
            # 在新段落中添加表格
            new_paragraph._element.addprevious(new_table)

    # 遍历文档中的段落，去除多余的空白段落
    for para in new_doc.paragraphs:
        # 使用正则表达式匹配空白段落（只包含空格和换行符）
        if re.match(r'^\s*$', para.text):
            # 删除空白段落
            new_doc._element.body.remove(para._element)

    # 获取待插入内容在目标文档中的位置
    source_doc = Document(temp_path)
    start_index = None
    for index, para in enumerate(source_doc.paragraphs):
        start_result = re.findall(start_pattern, para.text)
        if start_result:
            start_index = index
            break

    target_paragraph = source_doc.paragraphs[start_index]
    # 遍历源文档中的所有元素
    for element in reversed(new_doc.element.body):
        # 如果是段落，就在目标段落之后添加
        if isinstance(element, CT_P):
            # 考虑样式发生变化，对此进行调整
            para = Paragraph(element, doc)
            # 设置字体和字号
            new_para = source_doc.add_paragraph(para.text, style='Normal')
            font = new_para.runs[0].font
            font.name = "宋体"
            font.size = Pt(12)
            new_para.paragraph_format.space_before = Pt(12)
            new_para.paragraph_format.first_line_indent = Pt(25)
            source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1,
                                           new_para._element)

        # 如果是表格，也在目标段落之后添加
        elif isinstance(element, CT_Tbl):
            source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1, element)

    source_doc.save(temp_path)
    return None


# todo: 先复制内容到模板中，保存更新后的模板
def new_copy_content_main(doc_document, template_document):
    doc = doc_document
    # 新建临时文档
    new_doc = Document()
    start_found = False
    end_found = False
    for element in doc.element.body.xpath("w:p | w:tbl"):
        if isinstance(element, CT_P):
            para = Paragraph(element, doc)
            start_results = re.findall(start_pattern, para.text)
            if start_results:
                # print(para.text)
                start_found = True
                continue

        if isinstance(element, CT_P):
            para = Paragraph(element, doc)
            end_results = re.findall(end_pattern, para.text)
            if end_results:
                # print(para.text)
                end_found = True
                break

        # 复制文本段落
        if start_found and not end_found and isinstance(element, CT_P):
            para = Paragraph(element, doc)
            new_doc.add_paragraph(para.text)

        # 复制表格
        if start_found and not end_found and isinstance(element, CT_Tbl):
            table = Table(element, doc)
            new_table = deepcopy(table._element)
            # 在目标文档添加一个空段落
            new_doc.add_paragraph('')
            # 获取新段落
            new_paragraph = new_doc.paragraphs[-1]
            # 在新段落中添加表格
            new_paragraph._element.addprevious(new_table)

    # 遍历文档中的段落，去除多余的空白段落
    for para in new_doc.paragraphs:
        # 使用正则表达式匹配空白段落（只包含空格和换行符）
        if re.match(r'^\s*$', para.text):
            # 删除空白段落
            new_doc._element.body.remove(para._element)

    # 获取待插入内容在目标文档中的位置
    # source_doc = Document(temp_path)
    source_doc = template_document
    start_index = None
    for index, para in enumerate(source_doc.paragraphs):
        start_result = re.findall(start_pattern, para.text)
        if start_result:
            start_index = index
            break

    target_paragraph = source_doc.paragraphs[start_index]
    # 遍历源文档中的所有元素
    for element in reversed(new_doc.element.body):
        # 如果是段落，就在目标段落之后添加
        if isinstance(element, CT_P):
            # 考虑样式发生变化，对此进行调整
            para = Paragraph(element, doc)
            # 设置字体和字号
            new_para = source_doc.add_paragraph(para.text, style='Normal')
            font = new_para.runs[0].font
            font.name = "宋体"
            font.size = Pt(12)
            new_para.paragraph_format.space_before = Pt(12)
            new_para.paragraph_format.first_line_indent = Pt(25)
            source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1,
                                           new_para._element)

        # 如果是表格，也在目标段落之后添加
        elif isinstance(element, CT_Tbl):
            source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1, element)

    # source_doc.save(temp_path)
    return source_doc


if __name__ == '__main__':
    doc_path = "data/2022年度德阳市旌阳区人民法院(1).docx"
    # doc_path = 'data/特殊教育学校(1).docx'
    temp_path = "data/财务报告模板(2).doc"
    copy_content_main(doc_path, temp_path)
    # docx_file = r'wKjIbGQeSb6AUq1aAAgAABcLaMw312.docx'
    # doc = Document(docx_file)
    # new_doc = Document()
    # start_found = False
    # end_found = False
    # for element in doc.element.body.xpath("w:p | w:tbl"):
    #     if isinstance(element, CT_P):
    #         para = Paragraph(element, doc)
    #         start_results = re.findall(start_pattern, para.text)
    #         if start_results:
    #             start_found = True
    #             continue
    #
    #     if isinstance(element, CT_P):
    #         para = Paragraph(element, doc)
    #         end_results = re.findall(end_pattern, para.text)
    #         if end_results:
    #             end_found = True
    #             break
    #
    #     # 复制文本段落
    #     if start_found and not end_found and isinstance(element, CT_P):
    #         para = Paragraph(element, doc)
    #         new_paragraph = new_doc.add_paragraph(para.text)
    #
    #     # 复制表格
    #     if start_found and not end_found and isinstance(element, CT_Tbl):
    #         table = Table(element, doc)
    #         new_table = deepcopy(table._element)
    #         # 在目标文档添加一个空段落
    #         new_doc.add_paragraph('')
    #         # 获取新段落
    #         new_paragraph = new_doc.paragraphs[-1]
    #         # 在新段落中添加表格
    #         new_paragraph._element.addprevious(new_table)
    #
    # # 遍历文档中的段落，去除多余的空白段落
    # for para in new_doc.paragraphs:
    #     # 使用正则表达式匹配空白段落（只包含空格和换行符）
    #     if re.match(r'^\s*$', para.text):
    #         # 删除空白段落
    #         new_doc._element.body.remove(para._element)
    #
    # # # 保存目标文档
    # # new_doc.save('new.docx')
    #
    # # 获取文档的所有元素
    # elements = new_doc._element
    #
    # # 获取待插入内容在目标文档中的位置
    #
    # source_doc = Document("data/01系统导出模板.docx")
    # start_index = None
    # for index, para in enumerate(source_doc.paragraphs):
    #     start_result = re.findall(start_pattern, para.text)
    #     if start_result:
    #         start_index = index
    #         break
    #
    # target_paragraph = source_doc.paragraphs[start_index]
    # # 遍历源文档中的所有元素
    # for element in reversed(new_doc.element.body):
    #     # 如果是段落，就在目标段落之后添加
    #     if isinstance(element, CT_P):
    #         # 考虑样式发生变化，对此进行调整
    #         para = Paragraph(element, doc)
    #         # 设置字体和字号
    #         # font = source_doc.styles['Normal'].font
    #         # font.name = '宋体'
    #         # font.size = Pt(12)
    #         new_para = source_doc.add_paragraph(para.text, style='Normal')
    #         font = new_para.runs[0].font
    #         font.name = "宋体"
    #         font.size = Pt(12)
    #         new_para.paragraph_format.space_before = Pt(12)
    #         new_para.paragraph_format.first_line_indent = Pt(25)
    #         source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1, new_para._element)
    #
    #     # 如果是表格，也在目标段落之后添加
    #     elif isinstance(element, CT_Tbl):
    #         source_doc.element.body.insert(source_doc.element.body.index(target_paragraph._element) + 1, element)
    #
    # source_doc.save("data/new_01系统导出模板.docx")







    # import datetime
    # start_time = datetime.datetime.now()
    # # 参数：tables_dict、docx_file、save_path、template_path
    # tables_dict = {
    #             "table13": "以名义金额计量的资产名称、数量等情况，以及以名义金额计量理由的说明",
    #             "table5": "收入费用表（2）",
    #             "table4": "收入费用表（1）",
    #             "table3": "资产负债表续表2",
    #             "table2": "资产负债表续表1",
    #             "table1": "资产负债表",
    #             "table9": "（17）其他应付款明细信息如下：",
    #             "table8": "（9）无形资产明细信息如下：",
    #             "table10": "（24）其他收入明细信息如下：",
    #             "table7": "（7）固定资产明细信息如下：",
    #             "table11": "（25）业务活动费用明细信息如下：",
    #             "table6": "（1）货币资金明细信息如下：",
    #             "table12": "（28）商品和服务费用明细信息如下："
    #         }
    # # tables_dict = {'table1': '资产负债表', 'table2': '资产负债表续表1', 'table3': '资产负债表续表2', 'table4': '收入费用表（1）', 'table5': '收入费用表（2）', 'table6': '（1）货币资金明细信息如下：',
    # #                "table7": "（7）固定资产明细信息如下：", "table8": "（9）无形资产明细信息如下：", "table9": "（17）其他应付款明细信息如下：", "table10": "（24）其他收入明细信息如下：",
    # #                "table11": "（25）业务活动费用明细信息如下：", "table12": "（28）商品和服务费用明细信息如下：", }
    # docx_file = r'data/3月23测试半成品.docx'
    # document = Document(docx_file)
    # data_result = get_choose_table(document, list(tables_dict.values()))
    # print(data_result)
    # generate_report(data_result, save_path=r'报告文件.docx', template_path=r'data/3月23测试模板.docx', tables_dict=tables_dict)
    #
