最后说一下双Hash结构的实现类DoubleHashDictionary类:
java 代码
-
-
-
-
-
-
- package edu.stu.cn.segment.matching.dictionary;
-
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.io.Serializable;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Hashtable;
- import java.util.LinkedList;
-
-
-
-
- public class DoubleHashDictionary implements Serializable, DictionaryImpl
- {
-
-
-
- private static final long serialVersionUID = -6085097706592874294L;
-
-
-
-
- private Hashtablenull;
-
-
-
-
- private int maxWordLen = 0;
-
-
-
-
- private int wordCount = 0;
-
-
-
-
-
-
-
- public void deleteWord(String word)
- {
- if (word == null)
- return;
-
-
- word = word.trim();
-
- int len = word.length();
-
- if (this.indexTable[len - 1] == null)
- return;
-
- String fch = word.substring(0, 1);
-
- ArrayList<string> wal = null; </string>
- if (this.indexTable[len - 1].containsKey(fch))
- wal = this.indexTable[len - 1].get(fch);
- else
- return;
-
-
- String str = word.substring(1, len);
- if (Collections.binarySearch(wal, str) >= 0)
- {
- wal.remove(str);
- this.indexTable[len - 1].put(fch, wal);
- }
- else
- return;
- }
-
-
-
-
- public int getMaxWordLen()
- {
- return maxWordLen;
- }
-
-
-
-
- public int getWordCount()
- {
- return wordCount;
- }
-
-
-
-
-
-
-
- public void insertWord(String word)
- {
- if (word == null)
- return;
-
-
- word = word.trim();
-
- int len = word.length();
-
- if (this.indexTable[len - 1] == null)
- this.indexTable[len - 1] = new Hashtable
-
- String fch = word.substring(0, 1);
-
- ArrayList<string> wal = null; </string>
- if (this.indexTable[len - 1].containsKey(fch))
- wal = this.indexTable[len - 1].get(fch);
- else
- wal = new ArrayList<string>(); </string>
-
-
- String str = word.substring(1, len);
-
- if (Collections.binarySearch(wal, str) < 0)
- wal.add(str);
-
- Collections.sort(wal);
- this.indexTable[len - 1].put(fch, wal);
- }
-
-
-
-
-
-
-
- @SuppressWarnings("unchecked")
- public void loadDictionary(String fileName)
- {
- try
- {
-
- BufferedReader in = new BufferedReader(new FileReader(fileName));
- String word = null;
-
- LinkedList<string> wordLink = new LinkedList<string>(); </string></string>
-
- this.maxWordLen = 0;
-
-
- while ((word = in.readLine()) != null)
- {
- if (word.length() > this.maxWordLen)
- this.maxWordLen = word.length();
- wordLink.add(word);
- this.wordCount++;
- }
-
-
- this.indexTable = new Hashtable[this.maxWordLen];
-
- for (String w : wordLink)
- {
-
- this.insertWord(w);
- }
-
- wordLink.clear();
- }
- catch (IOException e)
- {
-
- e.printStackTrace();
- }
- }
-
-
-
-
-
-
-
-
- public boolean match(String word)
- {
- if (word == null)
- return false;
-
-
- int len = word.length();
-
-
- if (len > this.maxWordLen)
- return false;
-
-
- if (this.indexTable[len - 1] == null)
- return false;
-
-
- String fch = word.substring(0, 1);
- if (this.indexTable[len - 1].containsKey(fch))
- {
- if (len == 1)
- return true;
- else
- {
-
- ArrayList<string> wal = this.indexTable[len - 1].get(fch); </string>
-
- if (Collections.binarySearch(wal, word.substring(1, len)) < 0)
- return false;
- else
- return true;
- }
- }
- else
- return false;
- }
-
-
-
-
-
-
-
- public void print(PrintStream out)
- {
- for (int i = 0; i < this.indexTable.length; i++)
- {
- out.println("词长:" + (i + 1));
-
- if (this.indexTable[i] != null)
- {
- for (String fch : this.indexTable[i].keySet())
- {
- out.println("首字:" + fch);
- for (String w : this.indexTable[i].get(fch))
- out.println("\t" + w);
- }
- }
- }
- out.flush();
- }
- }
为什么说是双Hash结构呢?因为在查询词汇时,先使用词汇的长度length作为第一次Hash的key取出Hashtable结构的value,接下来也就跟首字Hash查询的操作一样了:取首字作为key取出一维线性表的value后采用折半查找。当词典中词汇数目很大时,一维线性表过长,进行折半查找无疑会提高比较的次数从而降低了效率。而使用双Hash正是希望通过增加多一次Hash求值从而将长的词汇表剪短成为多段较短的一维线性表减低折半查找时的比较次数。
既然说道了序列化,当然少不了序列化操作类DictionaryUtil:
java 代码
-
-
-
-
-
-
- package edu.stu.cn.segment.matching.util;
-
- import java.io.BufferedInputStream;
- import java.io.BufferedOutputStream;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.ObjectInputStream;
- import java.io.ObjectOutputStream;
-
- import edu.stu.cn.segment.matching.dictionary.DictionaryImpl;
-
-
-
-
-
- public class DictionaryUtilextends DictionaryImpl>
- {
-
-
-
-
-
-
-
- @SuppressWarnings("unchecked")
- public T readDictionary(String fileName)
- {
- try
- {
- ObjectInputStream in = new ObjectInputStream(
- new BufferedInputStream(new FileInputStream(fileName)));
- T dic = (T) in.readObject();
- in.close();
- return dic;
- }
- catch (Exception e)
- {
- System.err.println(e.getMessage());
- return null;
- }
- }
-
-
-
-
-
-
-
-
-
-
- public boolean writeDictionary(T dic, String fileName)
- {
- try
- {
- ObjectOutputStream out = new ObjectOutputStream(
- new BufferedOutputStream(new FileOutputStream(fileName)));
- out.writeObject(dic);
- out.flush();
- out.close();
- return true;
- }
- catch (IOException e)
- {
- System.err.println(e.getMessage());
- return false;
- }
-
- }
- }
通过这个操作类可以把实现DictionaryImpl接口的词典实现类写入文件或者从文件中读出。