# -*- coding: utf-8 -*-
# @Time : 2023/3/7 17:29
# @Author : ctt
# @File : copy_table
# @Project : 表格复制
from copy import deepcopy
from docx import Document

"""
prep_p = p.insert_paragraph_before("段落前插入内容)
document.add_page_break()       # 插入分页符
"""
import re
import json
import pandas as pd
from docx import Document
from docx.document import Document as _Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.table import _Cell, Table, _Row
from docx.text.paragraph import Paragraph
from docx.enum.section import WD_SECTION_START
from docx.enum.text import WD_BREAK
import docx
from docx.shared import Pt
# from docx.enum.text import WD_PARAGRAPH_ALIGNMENT


def iter_block_items(parent):
    """
    Generate a reference to each paragraph and table child within *parent*,
    in document order. Each returned value is an instance of either Table or
    Paragraph. *parent* would most commonly be a reference to a main
    Document object, but also works for a _Cell object, which itself can
    contain paragraphs and tables.
    """
    if isinstance(parent, _Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    elif isinstance(parent, _Row):
        parent_elm = parent._tr
    else:
        raise ValueError("something's not right")
    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)


def parase_table(table):
    out_df = pd.DataFrame()
    for i, row in enumerate(table.rows[:]):
        row_content = []
        for cell in row.cells:
            c = cell.text.strip()
            row_content.append(c)
        out_df = pd.concat([out_df, pd.DataFrame(row_content)], axis=1, ignore_index=True)
    return out_df.T


def get_table_position(para):
    pattern = re.compile(r'(?<={{).*?(?=}})')
    match = pattern.findall(para.text)
    if match:
        return match
    return False


def get_choose_table(document, table_names: list):
    '''
    :param document:
    :param table_names: 要提取的表格名称
    :return: {'表名': [表1， 表1续表]}
    '''
    table_names_rule = '|'.join(table_names)
    table_names_data = {}
    [table_names_data.update({key: []}) for key in table_names]
    dw_pattern = re.compile(r''+table_names_rule)
    i = 1
    for block in iter_block_items(document):
        # 处理段落
        if isinstance(block, Paragraph):
            dw = dw_pattern.findall(block.text)
        # 通过字符串匹配找到目标表格位置，并复制表格
        elif isinstance(block, Table) and dw:
            new_table = deepcopy(block.table)
            table_names_data[dw[0]].append(new_table._element)
        # 处理包含目标信息的表格（表头包含目标信息）
        elif isinstance(block, Table):
            # 按行解析表格并存储成df格式
            table_df = parase_table(block.table)
            if table_df[0][0] in table_names:
                # print(table_names_data[table_df[0][0]])
                new_table = deepcopy(block.table)
                # print(new_table._element)
                table_names_data[table_df[0][0] + "续表" + str(i)] = [new_table._element]
                i += 1
                # table_names_data[table_df[0][0]].append(new_table._element)
                # print(table_names_data[table_df[0][0]])
    return table_names_data


def new_document():
    document = Document()
    # 文档添加段落
    para = document.add_paragraph()
    # 在段落后面追加文本
    # run = para.add_run()
    # run.add_break()
    return para._p


def generate_report(table_names_data, template_document, tables_dict):
    document = template_document
    pattern = re.compile(r'(?<={{).*?(?=}})')
    # block 块对象主要包括标题、段落、图片、表、列表
    # run 内联对象为块对象的组成部分，块对象的所有内容都包含在内联对象中，一个块对象由一个或多个内联对象组成。修改字体、字号、文字颜色需要用到run
    # for block in iter_block_items(document):
    for block in document.paragraphs:
        if isinstance(block, Paragraph):
            match = pattern.findall(block.text)
            if match and "table" in match[0]:
                table_name = match[0]
                for _ in table_names_data[tables_dict[table_name]]:
                    # white_row = new_document()
                    # 在XML 级别上进行操作，即在元素之后直接添加内容，将任何尾部文本移动到新插入的元素后面，目的是使得新元素成为紧随其后的兄弟元素
                    # block._p.addnext(white_row)
                    block._p.addnext(_)
                p = block._element
                p.getparent().remove(p)
                block._p = block._element = None

    # 清除模板定义中的续表
    pattern_clear = re.compile(r'(?<=续表)[0-9]')
    for block in iter_block_items(document):
        if isinstance(block, Paragraph):
            match = pattern_clear.findall(block.text)
            if match:
                p = block._element
                p.getparent().remove(p)
                block._p = block._element = None
    # document.save(save_path)
    return document


def new_generate_report(table_names_data, template_document, tables_dict):
    document = template_document
    pattern = re.compile(r'(?<={{).*?(?=}})')
    # block 块对象主要包括标题、段落、图片、表、列表
    # run 内联对象为块对象的组成部分，块对象的所有内容都包含在内联对象中，一个块对象由一个或多个内联对象组成。修改字体、字号、文字颜色需要用到run
    # for block in iter_block_items(document):
    for block in document.paragraphs:
        if isinstance(block, Paragraph):
            match = pattern.findall(block.text)
            if match and "table" in match[0]:
                table_name = match[0]
                table_data = table_names_data[tables_dict[table_name]]
                # print(table_names_data[tables_dict[table_name]])
                if table_data:
                    for _ in table_data:
                        if table_data:
                            # white_row = new_document()
                            # 在XML 级别上进行操作，即在元素之后直接添加内容，将任何尾部文本移动到新插入的元素后面，目的是使得新元素成为紧随其后的兄弟元素
                            # block._p.addnext(white_row)
                            block._p.addnext(_)
                else:
                    # 创建一个包含“无”字的段落并插入到当前段落之后
                    new_p = document.add_paragraph()
                    # 设置首行缩进为2个字符
                    new_p.paragraph_format.first_line_indent = Pt(24)  # 24pt大约等于2个字符的缩进
                    # 创建一个运行并设置字体大小
                    new_r = new_p.add_run('无')
                    new_r.font.size = Pt(15)  # 设置字体大小为24磅
                    # 插入新段落
                    block._p.addnext(new_p._p)

                # 删除当前段落
                p = block._element
                p.getparent().remove(p)
                block._p = block._element = None

    # 清除模板定义中的续表
    pattern_clear = re.compile(r'(?<=续表)[0-9]')
    for block in iter_block_items(document):
        if isinstance(block, Paragraph):
            match = pattern_clear.findall(block.text)
            if match:
                p = block._element
                p.getparent().remove(p)
                block._p = block._element = None
    # save_path = r"data/test_copy.docx"
    # document.save(save_path)
    return document


if __name__ == '__main__':
    import datetime
    start_time = datetime.datetime.now()
    # 参数：tables_dict、docx_file、save_path、template_path
    tables_dict = {
                "table5": "收入费用表（2）",
                "table4": "收入费用表（1）",
                "table3": "资产负债表续表2",
                "table2": "资产负债表续表1",
                "table1": "资产负债表",
                # "table9": "（17）其他应付款明细信息如下：",
                # "table8": "（9）无形资产明细信息如下：",
                # "table10": "（24）其他收入明细信息如下：",
                "table7": "2.本年预算结余与盈余调节表",
                # "table11": "（25）业务活动费用明细信息如下：",
                "table6": "1.应付工程款情况表",
                "table13": "以名义金额计量的资产名称、数量等情况，以及以名义金额计量理由的说明",

            }
    # tables_dict = {'table1': '资产负债表', 'table2': '资产负债表续表1', 'table3': '资产负债表续表2', 'table4': '收入费用表（1）', 'table5': '收入费用表（2）', 'table6': '（1）货币资金明细信息如下：',
    #                "table7": "（7）固定资产明细信息如下：", "table8": "（9）无形资产明细信息如下：", "table9": "（17）其他应付款明细信息如下：", "table10": "（24）其他收入明细信息如下：",
    #                "table11": "（25）业务活动费用明细信息如下：", "table12": "（28）商品和服务费用明细信息如下：", }
    docx_file = r'data/待复制的表格文件.docx'
    document = Document(docx_file)
    template_file = r'data/模板.docx'
    template_document = Document(template_file)
    data_result = get_choose_table(document, list(tables_dict.values()))
    print(data_result)
    new_generate_report(table_names_data=data_result,
                        tables_dict=tables_dict,
                        template_document=template_document)
    # generate_report(data_result, save_path=r'data/报告文件.docx', template_path=r'data/new_财务报告模板.docx', tables_dict=tables_dict)

