博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
lucene对日期(date)和整形(int)处理
阅读量:6973 次
发布时间:2019-06-27

本文共 8220 字,大约阅读时间需要 27 分钟。

项目结构:

运行效果:

==========================================

代码部分:

==========================================

/lucene_0400_dateAndInt/src/com/b510/lucene/util/LuceneUtil.java

1 /**  2  *   3  */  4 package com.b510.lucene.util;  5   6 import java.io.File;  7 import java.io.IOException;  8 import java.text.ParseException;  9 import java.text.SimpleDateFormat; 10 import java.util.Date; 11 import java.util.HashMap; 12 import java.util.Map; 13  14 import org.apache.lucene.analysis.standard.StandardAnalyzer; 15 import org.apache.lucene.document.Document; 16 import org.apache.lucene.document.Field; 17 import org.apache.lucene.document.NumericField; 18 import org.apache.lucene.index.CorruptIndexException; 19 import org.apache.lucene.index.IndexReader; 20 import org.apache.lucene.index.IndexWriter; 21 import org.apache.lucene.index.IndexWriterConfig; 22 import org.apache.lucene.index.Term; 23 import org.apache.lucene.search.IndexSearcher; 24 import org.apache.lucene.search.ScoreDoc; 25 import org.apache.lucene.search.TermQuery; 26 import org.apache.lucene.search.TopDocs; 27 import org.apache.lucene.store.Directory; 28 import org.apache.lucene.store.FSDirectory; 29 import org.apache.lucene.store.LockObtainFailedException; 30 import org.apache.lucene.util.Version; 31  32 /** 33  * @author Hongten 
34 * @date 2013-1-31 35 */ 36 public class LuceneUtil { 37 38 /** 39 * 邮件id 40 */ 41 private String[] ids = { "1", "2", "3", "4", "5", "6" }; 42 /** 43 * 邮箱 44 */ 45 private String[] emails = { "aa@sina.com", "bb@foxmail.com", "cc@qq.com", 46 "dd@163.com", "ee@gmail.com", "ff@sina.com" }; 47 /** 48 * 邮件内容 49 */ 50 private String[] contents = { "hello,aa,hi,hell world!!", 51 "hello,bb,i'm a boy", 52 "hello,cc", 53 "hello,dd,welcome to my zone,this is a test hello", 54 "hello,ee,haha,xixi,hello world!!", 55 "hello,ff" }; 56 /** 57 * 附件数 58 */ 59 private int[] attachs = {1,5,3,2,1,6}; 60 /** 61 * 日期 62 */ 63 private Date[] dates = null; 64 /** 65 * 收件人的名称 66 */ 67 private String[] names = { "hongten", "hanyuan", "Devide", "Tom", "Steven", 68 "Shala" }; 69 70 private Directory directory = null; 71 /** 72 * 评分 73 */ 74 private Map
scores = new HashMap
(); 75 76 public LuceneUtil() { 77 try { 78 setDates(); 79 scores.put("sina.com", 1.0f); 80 scores.put("foxmail.com", 1.1f); 81 directory = FSDirectory.open(new File( 82 "D:/WordPlace/lucene/lucene_0400_dateAndInt/lucene/index")); 83 } catch (IOException e) { 84 e.printStackTrace(); 85 } 86 } 87 88 /** 89 * 创建日期 90 */ 91 public void setDates(){ 92 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); 93 try { 94 dates = new Date[ids.length]; 95 dates[0] = sdf.parse("2012-11-18"); 96 dates[1] = sdf.parse("2010-01-28"); 97 dates[2] = sdf.parse("2011-11-21"); 98 dates[3] = sdf.parse("2012-12-12"); 99 dates[4] = sdf.parse("2011-06-23");100 dates[5] = sdf.parse("2012-03-15");101 } catch (ParseException e) {102 e.printStackTrace();103 }104 }105 106 /**107 * 创建索引108 */109 public void index() {110 IndexWriter writer = null;111 try {112 writer = new IndexWriter(directory, new IndexWriterConfig(113 Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));114 //删除之前所建立的全部索引115 writer.deleteAll();116 // 创建文档117 Document document = null;118 for (int i = 0; i < ids.length; i++) {119 // Field.Store.YES:将会存储域值,原始字符串的值会保存在索引,以此可以进行相应的回复操作,对于主键,标题可以是这种方式存储120 // Field.Store.NO:不会存储域值,通常与Index.ANAYLIZED和起来使用,索引一些如文章正文等不需要恢复的文档121 // ==============================122 // Field.Index.ANALYZED:进行分词和索引,适用于标题,内容等123 // Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如身份证号码,姓名,ID等,适用于精确搜索124 // Field.Index.ANALYZED_NOT_NORMS:进行分词,但是不进行存储norms信息,这个norms中包括了创建索引的时间和权值等信息125 // Field.Index.NOT_ANALYZED_NOT_NORMS:不进行分词也不进行存储norms信息(不推荐)126 // Field.Index.NO:不进行分词127 document = new Document();128 document.add(new Field("id", ids[i], Field.Store.YES,129 Field.Index.NOT_ANALYZED_NO_NORMS));130 document.add(new Field("email", emails[i], Field.Store.YES,131 Field.Index.NOT_ANALYZED));132 document.add(new Field("content", contents[i], Field.Store.YES,133 Field.Index.ANALYZED));134 document.add(new Field("name", names[i], Field.Store.YES,135 Field.Index.NOT_ANALYZED_NO_NORMS));136 document.add(new NumericField("attach", Field.Store.YES,true).setIntValue(attachs[i]));137 document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));138 139 //这里进行加权处理140 String et = emails[i].substring(emails[i].lastIndexOf("@")+1);141 System.out.println(et);142 if(scores.containsKey(et)){143 document.setBoost(scores.get(et));144 }else{145 document.setBoost(0.6f);146 }147 writer.addDocument(document);148 }149 } catch (CorruptIndexException e) {150 e.printStackTrace();151 } catch (LockObtainFailedException e) {152 e.printStackTrace();153 } catch (IOException e) {154 e.printStackTrace();155 } finally {156 if (writer != null) {157 try {158 writer.close();159 } catch (CorruptIndexException e) {160 e.printStackTrace();161 } catch (IOException e) {162 e.printStackTrace();163 }164 }165 }166 }167 168 /**169 * 搜索170 */171 public void search(){172 try {173 IndexReader reader = IndexReader.open(directory);174 IndexSearcher searcher = new IndexSearcher(reader);175 TermQuery query = new TermQuery(new Term("content","hello"));176 TopDocs tds =searcher.search(query, 10);177 for(ScoreDoc sd : tds.scoreDocs){178 Document doc = searcher.doc(sd.doc);179 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");180 Date date = null;181 /*try {182 date = sdf.parse(doc.get("date"));183 } catch (ParseException e) {184 e.printStackTrace();185 }*/186 System.out.println("文档序号:["+sd.doc+"] 得分:["+sd.score+"] 邮件名称:["+doc.get("email")+"] 邮件人:["+doc.get("name")+"] 附件数:["+doc.get("attach")+"] 日期:["+doc.get("date")+"] 内容 : ["+doc.get("content")+"]");187 }188 } catch (CorruptIndexException e) {189 e.printStackTrace();190 } catch (IOException e) {191 e.printStackTrace();192 }193 }194 }

/lucene_0400_dateAndInt/src/com/b510/lucene/test/IndexTest.java

1 /** 2  *  3  */ 4 package com.b510.lucene.test; 5  6 import org.junit.Test; 7  8 import com.b510.lucene.util.LuceneUtil; 9 10 /**11  * @author Hongten 
12 * @date 2013-1-3113 */14 public class IndexTest {15 16 @Test17 public void testIndex(){18 LuceneUtil util = new LuceneUtil();19 util.index();20 }21 22 @Test23 public void testSearch(){24 LuceneUtil util = new LuceneUtil();25 util.search();26 }27 28 }

I'm Hongten

转载地址:http://uuesl.baihongyu.com/

你可能感兴趣的文章
HDU 2079 选课时间(题目已修改,注意读题)(简单的母函数运用)
查看>>
NYOJ260数数小木块
查看>>
云中漫步 - 1:申请 Azure 账号
查看>>
oralce health monitor
查看>>
SqlHelper
查看>>
前端画面-下拉后滚动
查看>>
golang使用http client发起get和post请求示例
查看>>
pathway 中几张特殊的通路图
查看>>
Java基础之深入理解Class对象与反射机制
查看>>
remoting生命周期
查看>>
javascript 复制功能 兼容所有浏览器的解决方案
查看>>
粒子滤波实现物体跟踪
查看>>
关于gcc、glibc和binutils模块之间的关系
查看>>
C#窗体内嵌外部程序(cmd.exe)的显示 转
查看>>
解决js跨域问题
查看>>
POJ 计算几何(2)
查看>>
【本科毕业设计论文】分布式网络爬虫的研究与实现
查看>>
12、浅谈MySQL主键
查看>>
C#多线程参数传递
查看>>
[zz]malloc()和calloc()
查看>>