根据文章内容自动抽取标签

javafan_303

浏览: 963351 次
性别:
来自: 北京

最近访客更多访客>>

perfect_control

wuzijingaip

yuanfen2014

pcpig

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

	private List<String> getKeyWord(String text) throws IOException {
		List<String> keywords = new ArrayList<String>();
		if (!Common.empty(text)) {
			Map<String, Integer> words = new HashMap<String, Integer>();
			Analyzer analyzer = new IKAnalyzer(true);
			StringReader reader = new StringReader(text);
			TokenStream tokenStream = analyzer.tokenStream("*", reader);
			TermAttribute termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
			while (tokenStream.incrementToken()) {
				String word = termAtt.term();
				if (word.length() > 1 && Common.strlen(word) > 2) {
					Integer count = words.get(word);
					if (count == null) {
						count = 0;
					}
					words.put(word, count + 1);
				}
			}
			if (words.size() > 0) {
				Directory dir = null;
				IndexSearcher searcher = null;
				try {
					String fieldName = "text";
					dir = new RAMDirectory();
					IndexWriter writer = new IndexWriter(dir, analyzer, true,
							IndexWriter.MaxFieldLength.LIMITED);
					Document doc = new Document();
					doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
					writer.addDocument(doc);
					writer.close();

					searcher = new IndexSearcher(dir);
					searcher.setSimilarity(new IKSimilarity());
					Set<String> keys = words.keySet();
					Map<String, Float> temps = new HashMap<String, Float>();
					for (String key : keys) {
						int count = words.get(key);
						Query query = IKQueryParser.parse(fieldName, key);
						TopDocs topDocs = searcher.search(query, 1);
						if (topDocs.totalHits > 0) {
							temps.put(key, topDocs.getMaxScore() * count);
						}
					}
					Entry<String, Float>[] keywordEntry = getSortedHashtableByValue(temps);
					for (Entry<String, Float> entry : keywordEntry) {
						if (keywords.size() < 5) {
							keywords.add(entry.getKey());
						}
					}
				} catch (Exception e) {
					e.printStackTrace();
				} finally {
					try {
						searcher.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
					try {
						dir.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
		}
		return keywords;
	}

分享到：