lucene 全文检索数据库

xiuying

浏览: 548814 次
性别:
来自: 福建

最近访客更多访客>>

chenside2002

wxcode

bluecrow_1986

NE0NE0

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

其他

全文检索 lucene Java Apache SQL

lucene 全文检索数据库
我们以前经常碰到搜索数据库的内容；用like ％的sql语句；
如果数据量大而且多表查询时；
用lucene2那就可以解决速度问题。
lucene2搜索photo表的title，username，tagname，desr内容；
用一个例题来说明更直观；
此例题能搜索中文分词；
（需要mysql5的jdbc包和lucene2的包）：
1、数据库我用mysql5；建一个photo表；数据库名是test。

photo表有一下几个字段：
CREATE TABLE `photo` (
`photo_id` int(11) NOT NULL auto_increment,
`title` varchar(11) default NULL,
`address` varchar(50) default NULL,
`descr` text,
`user_id` int(11) default NULL,
`user_name` varchar(11) default NULL,
`upload_time` date default NULL,
`tag_name` varchar(11) default NULL,
PRIMARY KEY (`photo_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT;
2、java文件有4个：
文件Photo.java是数据库的photo表的操作文件；
内容如下：

import java.sql.Connection;
import java.util.ArrayList;
import java.util.Date;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

public class Photo {
private long photoId;
private String title;
private String description;
private String address;
private String userName;
private long userId;
private String tag;
private Date date;

public String getAddress() {
return address;
}

public void setAddress(String address) {
this.address = address;
}

public String getDescription() {
return description;
}

public void setDescription(String description) {
this.description = description;
}

public long getPhotoId() {
return photoId;
}

public void setPhotoId(long photoId) {
this.photoId = photoId;
}

public String getTag() {
return tag;
}

public void setTag(String tag) {
this.tag = tag;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public long getUserId() {
return userId;
}

public void setUserId(long userId) {
this.userId = userId;
}

public String getUserName() {
return userName;
}

public void setUserName(String userName) {
this.userName = userName;
}

public static Photo[] loadPhotos(Connection con) throws Exception {
  ArrayList<Photo> list = new ArrayList<Photo>();
  PreparedStatement pstm = null;
  ResultSet rs = null;
  String sql = "select photo_id,title,address,descr,user_id,user_name,upload_time,tag_name from photo";
  try {
   pstm = con.prepareStatement(sql);
   rs = pstm.executeQuery();
   while (rs.next()) {
    Photo photo = new Photo();
    photo.setPhotoId(rs.getLong(1));
    photo.setTitle(rs.getString(2));
    photo.setAddress(rs.getString(3));
    photo.setDescription(rs.getString(4));
    photo.setUserId(rs.getLong(5));
    photo.setUserName(rs.getString(6));
    photo.setDate(rs.getTimestamp(7));
    photo.setTag(rs.getString(8));

    list.add(photo);
   }
   System.out.println("com.upolestar.kmpm.po.Photo.java ========"+list.size());
  } catch (SQLException e) {
   e.printStackTrace();
  } finally {
   if (rs != null) {
    rs.close();
   }
   if (pstm != null) {
    pstm.close();
   }
  }
  return (Photo[]) list.toArray(new Photo[list.size()]);
}

public Date getDate() {
return date;
}

public void setDate(Date date) {
this.date = date;
}
}

文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去；
内容如下：
package com.upolestar.kmpm.service;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

import com.upolestar.kmpm.po.Photo;

public class IndexerFile {
public static int indexFile(String indexDir, Photo[] list)
   throws IOException {
  IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(),
    true);
  writer.setUseCompoundFile(false);
  for (int i = 0; i < list.length; i++) {
   Document doc = new Document();
   doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()),
     Field.Store.YES, Field.Index.NO));
   if (list[i].getTitle() != null)
    doc.add(new Field("title", list[i].getTitle(), Field.Store.YES,
      Field.Index.TOKENIZED));
   if (list[i].getDescription() != null)
    doc.add(new Field("description", list[i].getDescription(),
      Field.Store.YES, Field.Index.TOKENIZED));
   doc.add(new Field("address", list[i].getAddress(), Field.Store.YES,
     Field.Index.NO));
   doc.add(new Field("userName", list[i].getUserName(),
     Field.Store.YES, Field.Index.TOKENIZED));
   doc.add(new Field("userId", String.valueOf(list[i].getUserId()),
     Field.Store.YES, Field.Index.NO));
   if (list[i].getTag().length() > 0)
    doc.add(new Field("tag", list[i].getTag(), Field.Store.YES,
      Field.Index.TOKENIZED));
   doc.add(new Field("uploadTime", list[i].getDate().toLocaleString(), Field.Store.YES,
     Field.Index.TOKENIZED));
   writer.addDocument(doc);
  }

  int numIndexed = writer.docCount();
  writer.optimize();
  writer.close();
  return numIndexed;
}
}

文件SearcherFile.java是搜索磁盘索引文件内容的；
内容如下：
package com.upolestar.kmpm.service;

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

public class SearcherFile {
public static void search(Searcher searcher, String[] q)
   throws IOException, ParseException {
  Analyzer analyzer = new StandardAnalyzer();
  String[] fields = { "title", "description", "tag", "userName" };
  Query query = MultiFieldQueryParser.parse(q, fields, analyzer);
  Hits hits = searcher.search(query);
  System.out.println("SearcherFile======"+hits.length());
  for (int i = 0; i < hits.length(); i++) {
   Document doc = hits.doc(i);
   System.out.println(doc.get("photoId") + "==="
     + doc.get("uploadTime")+ "==="
     + doc.get("title")+ "==="
     + doc.get("description")+ "==="
     + doc.get("tag")+ "==="
     + doc.get("userName"));
  }
}
}

文件test.java是操作的主文件；
内容如下：
package com.upolestar.kmpm.test;

import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Date;

import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;

import com.upolestar.kmpm.po.Photo;
import com.upolestar.kmpm.service.IndexerFile;
import com.upolestar.kmpm.service.SearcherFile;

public class Test {
public final static String indexDir = "D:\\TestLucene";

private static Connection getConnection() {
  Connection conn = null;
  String url = "jdbc:mysql://localhost:3306/opencms";
  String userName = "root";
  String password = "1111";
  try {
   Class.forName("com.mysql.jdbc.Driver");
   conn = java.sql.DriverManager
     .getConnection(url, userName, password);
  } catch (Exception e) {
   e.printStackTrace();
   System.out.println("Error Trace in getConnection() : "
     + e.getMessage());
  }
  return conn;
}

public static void main(String[] args) throws IOException, ParseException,
   SQLException {
  index();// 做索引
  Searcher searcher = null;
  try {
   searcher = new IndexSearcher(indexDir);
   search(searcher);// 搜索
  } catch (Exception e) {
   e.printStackTrace();
  } finally {
   if (searcher != null)
    searcher.close();
  }
}

public static void search(Searcher searcher) throws IOException,
   ParseException {
  // 以下是搜索的关键词
  String[] q = { "SVN", "捱三", "null", "null" };
  long start = new Date().getTime();
  SearcherFile.search(searcher, q);
  long end = new Date().getTime();
  System.out.println("花费时间：" + (double) (end - start) / 1000 + "秒");
}

public static void index() throws SQLException {
  Connection conn = null;
  try {
   conn = getConnection();
   Photo[] list = Photo.loadPhotos(conn);
   IndexerFile.indexFile(indexDir, list);
  } catch (Exception e) {
   e.printStackTrace();
  } finally {
   if (conn != null) {
    conn.close();
   }
  }
}
}

已经测试过！！

分享到：

poi为什么会出现这个问题？ | 将maven2创建的web项目转为myEclipse项目

2008-12-14 21:53
浏览 2766
评论(0)
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论