#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @File     : regular_extract
# @Author   : LiuYan
# @Time     : 2021/12/7 18:01

import re


class RegularExtract:

    def match_pattern(self, para_text: str, patterns: list) -> set:
        """
        提取符合规则的字符串
        :param para_text:   待匹配字符串
        :param patterns:    正则列表
        :return:
        """
        set_match = set()
        for pattern_str in patterns:
            pattern = re.compile(r'' + pattern_str)
            # 找到与之匹配的所有子串，并以迭代器形式返回，与findall 类似
            results = re.finditer(pattern, para_text)
            for result in results:
                # 带索引位置
                # print(result)
                result = result.group().strip()
                # 不带索引位置信息
                # print(result)
                set_match.add(result)
        return set_match

    def match_index(self, para_text: str, pattern_str: str) -> list:
        """
        提取符合规则的字符串开始结束位置列表
        :param para_text:   待匹配字符串
        :param pattern_str: 正则字符串
        :return:
        """
        list_index = []
        pattern = re.compile(r'' + pattern_str)
        results = re.finditer(pattern, para_text)
        for result in results:
            start_index, end_index = result.span()
            list_index.append(
                {
                    'start_index': start_index,
                    'end_index': end_index
                }
            )

        return list_index
