/**
 * 
 */
package com.zzsn.util;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Hashtable;

/**
 * @author ear
 * ver: 2013.09.20
 */
public class IDFHash {

	//private static String engIDF = "data/idf.hash.eng";
	//private static String chiIDF = "data/idf.hash.chi";
	//private static String engIDF = "data/domain.txt.en.idf";
	//private static String chiIDF = "data/domain.txt.cn.idf";
	
	private static String engIDF = "data/DF_en_5_sort.txt.idf";
	private static String chiIDF = "data/DF_cn_5_sort.txt.idf";

	private static String engStop = "data/stope";
	private static String chiStop = "data/stopc";
	private static Hashtable<String, Double> engIDFHash = null;// new
																// Hashtable<String,
																// Double>();
	private static Hashtable<String, Double> chiIDFHash = null; // new
																// Hashtable<String,
																// Double>();
	private static Hashtable<String, Integer> engStopHash = null; // new
																	// HashMap();
	private static Hashtable<String, Integer> chiStopHash = null; // new
																	// HashMap();
	private static double stopWordIDF = 0.000001;
	private static double unknownWordIDF = 6.0;

	public static double ChiWordIDF(String word) {
		if (LoadModels() == false) {
			return -1.0;
		}
		if (chiStopHash.containsKey(word)) {
			return stopWordIDF;
		} else if (chiIDFHash.containsKey(word)) {
			return chiIDFHash.get(word);
		} else {
			return unknownWordIDF;
		}
	}

	public static double EngWordIDF(String word) {
		if (LoadModels() == false) {
			return -1.0;
		}
		word = word.toLowerCase();
		if (engStopHash.containsKey(word)) {
			return stopWordIDF;
		} else if (engIDFHash.containsKey(word)) {
			return engIDFHash.get(word);
		} else {
			return unknownWordIDF;
		}
	}

	public static boolean IsEngStopWord(String word) {
		try {
			if (engStopHash == null) {
				engStopHash = new Hashtable<String, Integer>();
				String sLine = "";
				FileInputStream fis;
				fis = new FileInputStream(engStop);
				InputStreamReader isr = new InputStreamReader(fis);
				BufferedReader in = new BufferedReader(isr);
				while ((sLine = in.readLine()) != null) {
					if (sLine.trim().equals("") == true) {
						continue;
					}
					engStopHash.put(sLine.trim(), 1);
				}
				in.close();
				isr.close();
				fis.close();
			}

			if (engStopHash.containsKey(word.toLowerCase())) {
				return true;
			}
			return false;
		} catch (Exception e) {
			e.printStackTrace();
			return false;
		}

	}

	public static boolean IsChiStopWord(String word) {
		try {
			if (chiStopHash == null) {
				chiStopHash = new Hashtable<String, Integer>();
				String sLine = "";
				FileInputStream fis;
				fis = new FileInputStream(chiStop);
				InputStreamReader isr = new InputStreamReader(fis);
				BufferedReader in = new BufferedReader(isr);
				while ((sLine = in.readLine()) != null) {
					if (sLine.trim().equals("")) {
						continue;
					}
					chiStopHash.put(sLine.trim(), 1);
				}
				in.close();
				isr.close();
				fis.close();
			}

			if (chiStopHash.containsKey(word)) {
				return true;
			}
			return false;
		} catch (Exception e) {
			e.printStackTrace();
			return false;
		}

	}

	private static boolean LoadModels() {
		try {
			if (engStopHash == null) {
				engStopHash = new Hashtable<String, Integer>();
				String sLine = "";
				FileInputStream fis;
				fis = new FileInputStream(engStop);
				InputStreamReader isr = new InputStreamReader(fis);
				BufferedReader in = new BufferedReader(isr);
				while ((sLine = in.readLine()) != null) {
					if (sLine.trim().equals("") == true) {
						continue;
					}
					engStopHash.put(sLine.trim(), 1);
				}
				in.close();
				isr.close();
				fis.close();
			}

			if (chiStopHash == null) {
				chiStopHash = new Hashtable<String, Integer>();
				String sLine = "";
				FileInputStream fis;
				fis = new FileInputStream(chiStop);
				InputStreamReader isr = new InputStreamReader(fis);
				BufferedReader in = new BufferedReader(isr);
				while ((sLine = in.readLine()) != null) {
					if (sLine.trim().equals("")) {
						continue;
					}
					chiStopHash.put(sLine.trim(), 1);
				}
				in.close();
				isr.close();
				fis.close();
			}

			if (engIDFHash == null) {
				engIDFHash = new Hashtable<String, Double>();
				String sLine = "";
				FileInputStream fis;
				fis = new FileInputStream(engIDF);
				InputStreamReader isr = new InputStreamReader(fis);
				BufferedReader in = new BufferedReader(isr);
				while ((sLine = in.readLine()) != null) {
					if (sLine.trim().equals("") == true) {
						continue;
					}
					String[] words = sLine.split("\t");
					if (words.length == 2) {
						engIDFHash.put(words[0].trim(), Double
								.parseDouble(words[1].trim()));
					}
				}
				in.close();
				isr.close();
				fis.close();
			}

			if (chiIDFHash == null) {
				chiIDFHash = new Hashtable<String, Double>();
				String sLine = "";
				FileInputStream fis;
				fis = new FileInputStream(chiIDF);
				InputStreamReader isr = new InputStreamReader(fis);
				BufferedReader in = new BufferedReader(isr);
				while ((sLine = in.readLine()) != null) {
					if (sLine.trim().equals("") == true) {
						continue;
					}
					String[] words = sLine.split("\t");
					if (words.length == 2) {
						chiIDFHash.put(words[0].trim(), Double
								.parseDouble(words[1].trim()));
					}
				}
				in.close();
				isr.close();
				fis.close();
			}
			return true;

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return false;
		}
	}
	

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		
		
		double idf = IDFHash.EngWordIDF("of");
		System.out.println(idf);
		double idf2 = IDFHash.ChiWordIDF("手册");
		System.out.println(idf2);
		double idf3 = IDFHash.EngWordIDF("apple");
		System.out.println(idf3);
		System.out.println("OK");
		
	}

	public static void init() {
		// TODO Auto-generated method stub
		LoadModels();
		
	}

	
}
