package com.zzsn.event.util;

import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;

import java.util.Collection;
import java.util.Iterator;
import java.util.TreeMap;

/**
 * @author kongliufeng
 * @Description TODO:
 * @create 2020-09-03 16:30
 * @Version 1.0
 */
public class StopWordsUtil {

    private static final String path = "hanlp/cloudstopwords.txt";

    private static DoubleArrayTrie<String> tree;

    static {
        TreeMap<String, String> map = new TreeMap<>();
        for (String word : FileUtil.readFileList(path)) {
            map.put(word,word);
        }
        tree = new DoubleArrayTrie<>(map);
    }

    /**
     * @Description 判断一个词是否为停用词
     * @author kongliufeng
     * @创建时间 2020/9/3 17:08
     * @Version 1.0
     */
    public static Boolean isStopWord(String word){
        if(tree.containsKey(word)){
            return  true;
        }
        return false;
    }

    /**
     * @Description 对已有的集合移除停用词
     * @author kongliufeng
     * @创建时间 2020/9/3 17:11
     * @Version 1.0
     */
    public static void removeStopWords(Collection<String> collection, String excludeWord){
        if(collection!=null){
            Iterator<String> iterator = collection.iterator();
            while (iterator.hasNext()){
                String next = iterator.next();
                if(excludeWord!=null){
                    if(excludeWord.equals(next)){
                        iterator.remove();
                    }
                }
                if(tree.containsKey(next))
                    iterator.remove();
            }
        }
    }

    public static void removeStopWords(Collection<String> collection){
        if(collection!=null){
            Iterator<String> iterator = collection.iterator();
            while (iterator.hasNext()){
                if(tree.containsKey(iterator.next()))
                    iterator.remove();
            }
        }
    }


}
