释放双眼,带上耳机,听听看~!
Lucene搜索方式大合集
1、IndexCreationTest类:Lucene搜索测试类
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650 1package junit;
2
3import java.io.File;
4import java.io.IOException;
5import java.text.ParseException;
6import java.util.ArrayList;
7import java.util.List;
8
9import org.apache.commons.lang.math.NumberUtils;
10import org.apache.lucene.document.Document;
11import org.apache.lucene.document.Field;
12import org.apache.lucene.document.NumericField;
13import org.apache.lucene.index.IndexReader;
14import org.apache.lucene.index.Term;
15import org.apache.lucene.queryParser.MultiFieldQueryParser;
16import org.apache.lucene.search.BooleanClause;
17import org.apache.lucene.search.BooleanQuery;
18import org.apache.lucene.search.Filter;
19import org.apache.lucene.search.FuzzyQuery;
20import org.apache.lucene.search.IndexSearcher;
21import org.apache.lucene.search.MultiPhraseQuery;
22import org.apache.lucene.search.NumericRangeFilter;
23import org.apache.lucene.search.PhraseQuery;
24import org.apache.lucene.search.PrefixQuery;
25import org.apache.lucene.search.Query;
26import org.apache.lucene.search.ScoreDoc;
27import org.apache.lucene.search.TermQuery;
28import org.apache.lucene.search.TermRangeQuery;
29import org.apache.lucene.search.TopDocs;
30import org.apache.lucene.search.WildcardQuery;
31import org.apache.lucene.search.highlight.Formatter;
32import org.apache.lucene.search.highlight.Fragmenter;
33import org.apache.lucene.search.highlight.Highlighter;
34import org.apache.lucene.search.highlight.QueryScorer;
35import org.apache.lucene.search.highlight.SimpleFragmenter;
36import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
37import org.apache.lucene.search.regex.RegexQuery;
38import org.apache.lucene.store.FSDirectory;
39import org.apache.lucene.util.Version;
40import org.junit.Test;
41import org.wltea.analyzer.lucene.IKAnalyzer;
42
43import com.ljq.entity.Person;
44import com.ljq.utils.Consts;
45import com.ljq.utils.DateUtils;
46import com.ljq.utils.LuceneUtil;
47import com.ljq.utils.XMLPropertyConfig;
48
49/**
50 * Lucene搜索方式大合集<br/><br/>
51 *
52 * Lucene搜索种类很多。这里就介绍几个最常用的。其中TermQuery\BooleanQuery\RegexQuery功能最强大,最为常用。
53 *
54 * @author 林计钦
55 * @version 1.0 2013-6-7 上午09:34:08
56 */
57public class IndexQueryTest {
58
59 /**
60 * 词条搜索(单个关键字查找)<br/><br/>
61 *
62 * 主要对象是TermQuery,调用方式如下:<br/>
63 * Term term=new Term(字段名, 搜索关键字);<br/>
64 * Query query=new TermQuery(term);<br/>
65 * Hits hits=searcher.search(query);<br/>
66 * @throws Exception
67 */
68 @Test
69 public void termQuery() throws Exception {
70 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
71 IndexSearcher searcher = new IndexSearcher(reader);
72
73 //Term term=new Term("ids", "1");
74 //Term term=new Term("ages", "20");
75 //Term term=new Term("birthdays", "2008-06-12");
76 //Term term=new Term("name", "张三");
77 Term term=new Term("city", "厦门");
78
79 Query query=new TermQuery(term);
80 TopDocs topDocs=searcher.search(query, 1000);
81 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
82 System.out.println();
83
84 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
85 for (ScoreDoc scDoc : scoreDocs) {
86 Document document = searcher.doc(scDoc.doc);
87 String id = document.get("id");
88 String name = document.get("name");
89 String age = document.get("age");
90 String city = document.get("city");
91 String birthday = document.get("birthday");
92 float score = scDoc.score; //相似度
93
94 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
95 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
96 }
97
98 searcher.close();
99 reader.close();
100 }
101
102 /**
103 * 组合搜索(允许多个关键字组合搜索)<br/><br/>
104 *
105 * 主要对象是BooleanQuery,调用方式如下:<br/>
106 * Term term1=new Term(字段名, 搜索关键字);<br/>
107 * TermQuery query1=new TermQuery(term1);<br/><br/>
108 *
109 * Term term2=new Term(字段名, 搜索关键字);<br/>
110 * TermQuery query2=new TermQuery(term2);<br/><br/>
111 *
112 * BooleanQuery booleanQuery=new BooleanQuery();<br/>
113 * booleanQuery.add(query1, 参数);<br/>
114 * booleanQuery.add(query2, 参数);<br/><br/>
115 *
116 * Hits hits=searcher.search(booleanquery);<br/>
117 * 此方法中的核心在BooleanQuery的add方法上,其第二个参数有三个可选值,对应着逻辑上的与或非关系。<br/><br/>
118 *
119 * 参数如下:<br/>
120 * BooleanClause.Occur.MUST:必须包含,类似于逻辑运算的与<br/>
121 * BooleanClause.Occur.MUST_NOT:必须不包含,类似于逻辑运算的非<br/>
122 * BooleanClause.Occur.SHOULD:可以包含,类似于逻辑运算的或<br/>
123 * 这三者组合,妙用无穷。<br/>
124 * @throws Exception
125 */
126 @Test
127 public void booleanQuery() throws Exception {
128 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
129 IndexSearcher searcher = new IndexSearcher(reader);
130
131 //组合条件:
132 //年龄(或):10、20、30、40
133 //名字(与): 四
134 //城市(非): 莆田
135 TermQuery ageQuery10=new TermQuery(new Term("ages", "10"));
136 TermQuery ageQuery20=new TermQuery(new Term("ages", "20"));
137 TermQuery ageQuery30=new TermQuery(new Term("ages", "30"));
138 TermQuery ageQuery40=new TermQuery(new Term("ages", "40"));
139
140 TermQuery nameQuery=new TermQuery(new Term("name", "四"));
141
142 TermQuery cityQuery=new TermQuery(new Term("city", "莆田"));
143
144 BooleanQuery booleanQuery=new BooleanQuery();
145 booleanQuery.add(ageQuery10, BooleanClause.Occur.SHOULD);
146 booleanQuery.add(ageQuery20, BooleanClause.Occur.SHOULD);
147 booleanQuery.add(ageQuery30, BooleanClause.Occur.SHOULD);
148 booleanQuery.add(ageQuery40, BooleanClause.Occur.SHOULD);
149 booleanQuery.add(nameQuery, BooleanClause.Occur.MUST);
150 booleanQuery.add(cityQuery, BooleanClause.Occur.MUST_NOT);
151
152 TopDocs topDocs=searcher.search(booleanQuery, 1000);
153 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
154 System.out.println();
155
156 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
157 for (ScoreDoc scDoc : scoreDocs) {
158 Document document = searcher.doc(scDoc.doc);
159 String id = document.get("id");
160 String name = document.get("name");
161 String age = document.get("age");
162 String city = document.get("city");
163 String birthday = document.get("birthday");
164 float score = scDoc.score; //相似度
165
166 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
167 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
168 }
169
170 searcher.close();
171 reader.close();
172 }
173
174 /**
175 * 范围搜索(允许搜索指定范围内的关键字结果)<br/><br/>
176 *
177 * 主要对象是TermRangeQuery,调用方式如下:<br/>
178 * TermRangeQuery rangequery=new TermRangeQuery(字段名, 起始值, 终止值, 起始值是否包含边界, 终止值是否包含边界); <br/><br/>
179 *
180 * Hits hits=searcher.search(rangequery);<br/>
181 * 此方法中的参数是Boolean类型的,表示是否包含边界 。<br/>
182 * true 包含边界<br/>
183 * false不包含边界<br/>
184 * @throws Exception
185 */
186 @Test
187 public void rangeQuery() throws Exception {
188 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
189 IndexSearcher searcher = new IndexSearcher(reader);
190
191 TermRangeQuery idQuery=new TermRangeQuery("ids", "1", "3", true, true);
192 TermRangeQuery ageQuery=new TermRangeQuery("ages", "10", "30", true, true);
193 TermRangeQuery timeQuery=new TermRangeQuery("birthdays", "2011-03-09", "2013-01-07", true, true);
194
195 TopDocs topDocs=searcher.search(timeQuery, 1000);
196 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
197 System.out.println();
198
199 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
200 for (ScoreDoc scDoc : scoreDocs) {
201 Document document = searcher.doc(scDoc.doc);
202 String id = document.get("id");
203 String name = document.get("name");
204 String age = document.get("age");
205 String city = document.get("city");
206 String birthday = document.get("birthday");
207 float score = scDoc.score; //相似度
208
209 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
210 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
211 }
212
213 searcher.close();
214 reader.close();
215 }
216
217 /**
218 * 前缀搜索(搜索起始位置符合要求的结果)<br/><br/>
219 *
220 * 主要对象是PrefixQuery,调用方式如下:<br/>
221 * Term term=new Term(字段名, 搜索关键字);<br/>
222 * PrefixQuery prefixquery=new PrefixQuery(term);<br/>
223 * Hits hits=searcher.search(prefixquery);<br/>
224 *
225 * @throws Exception
226 */
227 @Test
228 public void prefixQuery() throws Exception {
229 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
230 IndexSearcher searcher = new IndexSearcher(reader);
231
232 Term term=new Term("name", "王");
233 PrefixQuery prefixquery=new PrefixQuery(term);
234
235 TopDocs topDocs=searcher.search(prefixquery, 1000);
236 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
237 System.out.println();
238
239 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
240 for (ScoreDoc scDoc : scoreDocs) {
241 Document document = searcher.doc(scDoc.doc);
242 String id = document.get("id");
243 String name = document.get("name");
244 String age = document.get("age");
245 String city = document.get("city");
246 String birthday = document.get("birthday");
247 float score = scDoc.score; //相似度
248
249 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
250 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
251 }
252
253 searcher.close();
254 reader.close();
255 }
256
257 /**
258 * 短语搜索(根据零碎的短语组合成新的词组进行搜索)<br/><br/>
259 *
260 * 主要对象是PhraseQuery,调用方式如下:<br/>
261 * Term term1=new Term(字段名, 搜索关键字);<br/>
262 * Term term2=new Term(字段名, 搜索关键字);<br/><br/>
263 *
264 * PhraseQuery phrasequery=new PhraseQuery();<br/>
265 * phrasequery.setSlop(参数);<br/>
266 * phrasequery.add(term1);<br/>
267 * phrasequery.add(term2);<br/>
268 * Hits hits=searcher.search(phrasequery);<br/>
269 * 其中setSlop的参数是设置两个关键字之间允许间隔的最大值。<br/>
270 * @throws Exception
271 */
272 @Test
273 public void phraseQuery() throws Exception {
274 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
275 IndexSearcher searcher = new IndexSearcher(reader);
276
277 Term term1=new Term("name", "林");
278 Term term2=new Term("name", "钦");
279
280 PhraseQuery phrasequery=new PhraseQuery();
281 phrasequery.setSlop(100);
282 phrasequery.add(term1);
283 phrasequery.add(term2);
284
285 TopDocs topDocs=searcher.search(phrasequery, 1000);
286 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
287 System.out.println();
288
289 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
290 for (ScoreDoc scDoc : scoreDocs) {
291 Document document = searcher.doc(scDoc.doc);
292 String id = document.get("id");
293 String name = document.get("name");
294 String age = document.get("age");
295 String city = document.get("city");
296 String birthday = document.get("birthday");
297 float score = scDoc.score; //相似度
298
299 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
300 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
301 }
302
303 searcher.close();
304 reader.close();
305 }
306
307 /**
308 * 多短语搜索(先指定一个前缀关键字,然后其他的关键字加在此关键字之后,组成词语进行搜索)<br/><br/>
309 *
310 * 主要对象是MultiPhraseQuery,调用方式如下:<br/>
311 *
312 * Term term=new Term(字段名,前置关键字);<br/>
313 * Term term1=new Term(字段名,搜索关键字);<br/>
314 * Term term2=new Term(字段名,搜索关键字);<br/><br/>
315 *
316 * MultiPhraseQuery multiPhraseQuery=new MultiPhraseQuery();<br/><br/>
317 *
318 * multiPhraseQuery.add(term);<br/>
319 * multiPhraseQuery.add(new Term[]{term1, term2});<br/><br/>
320 *
321 * Hits hits=searcher.search(multiPhraseQuery);<br/>
322 * @throws Exception
323 */
324 @Test
325 public void multiPhraseQuery() throws Exception {
326 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
327 IndexSearcher searcher = new IndexSearcher(reader);
328
329 //查询“计张”、“计钦”组合的关键词,先指定一个前缀关键字,然后其他的关键字加在此关键字之后,组成词语进行搜索
330 Term term=new Term("name", "计"); //前置关键字
331 Term term1=new Term("name", "张"); //搜索关键字
332 Term term2=new Term("name", "钦"); //搜索关键字
333
334 MultiPhraseQuery multiPhraseQuery=new MultiPhraseQuery();
335 multiPhraseQuery.add(term);
336 multiPhraseQuery.add(new Term[]{term1, term2});
337
338
339 TopDocs topDocs=searcher.search(multiPhraseQuery, 1000);
340 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
341 System.out.println();
342
343 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
344 for (ScoreDoc scDoc : scoreDocs) {
345 Document document = searcher.doc(scDoc.doc);
346 String id = document.get("id");
347 String name = document.get("name");
348 String age = document.get("age");
349 String city = document.get("city");
350 String birthday = document.get("birthday");
351 float score = scDoc.score; //相似度
352
353 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
354 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
355 }
356
357 searcher.close();
358 reader.close();
359 }
360
361 /**
362 * 模糊搜索(顾名思义)<br/><br/>
363 *
364 * 主要对象是FuzzyQuery,调用方式如下:<br/><br/>
365 *
366 * Term term=new Term(字段名, 搜索关键字);<br/>
367 * FuzzyQuery fuzzyquery=new FuzzyQuery(term,参数);<br/>
368 * Hits hits=searcher.search(fuzzyquery);<br/>
369 * 此中的参数是表示模糊度,是小于1的浮点小数,比如0.5f
370 * @throws Exception
371 */
372 @Test
373 public void fuzzyQuery() throws Exception {
374 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
375 IndexSearcher searcher = new IndexSearcher(reader);
376
377 Term term=new Term("name", "三张");
378 FuzzyQuery fuzzyquery=new FuzzyQuery(term, 0.5f);
379
380 TopDocs topDocs=searcher.search(fuzzyquery, 1000);
381 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
382 System.out.println();
383
384 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
385 for (ScoreDoc scDoc : scoreDocs) {
386 Document document = searcher.doc(scDoc.doc);
387 String id = document.get("id");
388 String name = document.get("name");
389 String age = document.get("age");
390 String city = document.get("city");
391 String birthday = document.get("birthday");
392 float score = scDoc.score; //相似度
393
394 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
395 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
396 }
397
398 searcher.close();
399 reader.close();
400 }
401
402 /**
403 * 通配符搜索(顾名思义)<br/><br/>
404 *
405 * 主要对象是:WildcardQuery,调用方式如下:<br/><br/>
406 *
407 * Term term=new Term(字段名,搜索关键字+通配符);<br/>
408 * WildcardQuery wildcardquery=new WildcardQuery(term);<br/>
409 * Hits hits=searcher.search(wildcardquery);<br/><br/>
410 *
411 * 其中的通配符分两种,即*和?<br/>
412 * * 表示任意多的自负<br/>
413 * ?表示任意一个字符
414 * @throws Exception
415 */
416 @Test
417 public void wildcardQuery() throws Exception {
418 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
419 IndexSearcher searcher = new IndexSearcher(reader);
420
421 Term term=new Term("name", "三?");
422 WildcardQuery wildcardQuery=new WildcardQuery(term);
423
424 TopDocs topDocs=searcher.search(wildcardQuery, 1000);
425 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
426 System.out.println();
427
428 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
429 for (ScoreDoc scDoc : scoreDocs) {
430 Document document = searcher.doc(scDoc.doc);
431 String id = document.get("id");
432 String name = document.get("name");
433 String age = document.get("age");
434 String city = document.get("city");
435 String birthday = document.get("birthday");
436 float score = scDoc.score; //相似度
437
438 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
439 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
440 }
441
442 searcher.close();
443 reader.close();
444 }
445
446 /**
447 * 正则表达式搜索(顾名思义,这个类引入lucene-queries-3.5.0.jar包)<br/><br/>
448 *
449 * 主要对象是:RegexQuery,调用方式如下 <br/>
450 * String regex = ".*"; <br/>
451 * Term term = new Term (search_field_name, regex); <br/>
452 * RegexQuery query = new RegexQuery (term); <br/>
453 * TopDocs hits = searcher.search (query, 100); <br/>
454 * @throws Exception
455 */
456 @Test
457 public void regexQuery() throws Exception {
458 IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
459 IndexSearcher searcher = new IndexSearcher(reader);
460
461 String regex = "林*";
462 Term term=new Term("name", regex);
463 RegexQuery query = new RegexQuery(term);
464
465 TopDocs topDocs=searcher.search(query, 1000);
466 System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
467 System.out.println();
468
469 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
470 for (ScoreDoc scDoc : scoreDocs) {
471 Document document = searcher.doc(scDoc.doc);
472 String id = document.get("id");
473 String name = document.get("name");
474 String age = document.get("age");
475 String city = document.get("city");
476 String birthday = document.get("birthday");
477 float score = scDoc.score; //相似度
478
479 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
480 id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
481 }
482
483 searcher.close();
484 reader.close();
485 }
486
487 /**
488 * 数值范围过滤器,如:int、long、float类型等
489 *
490 * @throws Exception
491 */
492 @Test
493 public void numericFilter() throws Exception{ //CustomScoreQuery
494 //Filter filter = NumericRangeFilter.newLongRange("id", 1l, 3l, true, true);
495 Filter filter = NumericRangeFilter.newIntRange("age", 1, 39, true, true);
496 List<Person> persons=search(filter, new String[]{"name","city"}, "厦门");
497 for(Person person : persons){
498 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s.",
499 person.getId(), person.getName(), person.getAge(), person.getCity(), DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT)));
500 }
501 }
502
503 /**
504 * 时间范围过滤器
505 * @throws Exception
506 */
507 @Test
508 public void dateFilter() throws Exception{
509 //2008-06-12
510 long min=DateUtils.stringToDate("2008-06-12", Consts.FORMAT_SHORT).getTime();
511 //2013-01-07
512 long max=DateUtils.stringToDate("2013-01-07", Consts.FORMAT_SHORT).getTime();
513 Filter filter = NumericRangeFilter.newLongRange("birthday", min, max, true, true);
514 List<Person> persons=search(filter, new String[]{"name","city"}, "厦门");
515 for(Person person : persons){
516 System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s.",
517 person.getId(), person.getName(), person.getAge(), person.getCity(), DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT)));
518 }
519 }
520
521 /**
522 * 创建索引
523 *
524 * @throws Exception
525 */
526 @Test
527 public void createIndex() throws Exception {
528 List<Document> docs = new ArrayList<Document>();
529 for (Person person : getPersons()) {
530 Document doc = new Document();
531 //声明为NumericField的字段,只能用NumericRangeFilter对象范围查询,不能用作关键字查询。
532 //NumericField不推荐,统一用Field
533 doc.add(new NumericField("id", Field.Store.YES, true).setLongValue(person.getId()));
534 doc.add(new NumericField("age", Field.Store.YES, true).setIntValue(person.getAge()));
535 doc.add(new NumericField("birthday", Field.Store.YES, true).setLongValue(person.getBirthday().getTime()));
536
537 doc.add(new Field("ids", person.getId()+"", Field.Store.YES, Field.Index.NOT_ANALYZED));
538 doc.add(new Field("ages", person.getAge()+"", Field.Store.YES, Field.Index.NOT_ANALYZED));
539 doc.add(new Field("birthdays", DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT),
540 Field.Store.YES, Field.Index.NOT_ANALYZED));
541 doc.add(new Field("name", person.getName(), Field.Store.YES, Field.Index.ANALYZED));
542 doc.add(new Field("city", person.getCity(), Field.Store.YES, Field.Index.ANALYZED));
543
544 docs.add(doc);
545 }
546 LuceneUtil.createIndex(docs);
547 }
548
549 private List<Person> search(Filter filter, String[] fields, String keyword) {
550 List<Person> result = new ArrayList<Person>();
551
552 IndexSearcher indexSearcher = null;
553 TopDocs topDocs = null;
554 try {
555 // 创建索引搜索器,且只读
556 IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
557 indexSearcher = new IndexSearcher(indexReader);
558
559 MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35,
560 fields, new IKAnalyzer());
561 Query query = queryParser.parse(keyword);
562
563 // 返回前number条记录
564 if(filter == null){
565 topDocs=indexSearcher.search(query, 100000);
566 }else {
567 topDocs=indexSearcher.search(query, filter, 100000);
568 }
569
570 // 信息展示
571 int totalCount = topDocs.totalHits;
572 System.out.println("共检索出 " + totalCount + " 条记录");
573
574 //高亮显示
575 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
576 QueryScorer fragmentScorer = new QueryScorer(query);
577 Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
578 Fragmenter fragmenter = new SimpleFragmenter(100);
579 highlighter.setTextFragmenter(fragmenter);
580
581 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
582
583 for (ScoreDoc scDoc : scoreDocs) {
584 Document document = indexSearcher.doc(scDoc.doc);
585 String id = document.get("id");
586 String name = document.get("name");
587 String age = document.get("age");
588 String city = document.get("city");
589 String birthday = document.get("birthday");
590 float score = scDoc.score; //相似度
591 System.out.println("相似度:"+score);
592
593 String lighterName = highlighter.getBestFragment(new IKAnalyzer(), "name", name);
594 if (null == lighterName) {
595 lighterName = name;
596 }
597
598 String lighterAge = highlighter.getBestFragment(new IKAnalyzer(), "age", age);
599 if (null == lighterAge) {
600 lighterAge = age;
601 }
602
603 String lighterCity= highlighter.getBestFragment(new IKAnalyzer(), "city", city);
604 if (null == lighterCity) {
605 lighterCity = city;
606 }
607
608 String lighterBirthday = highlighter.getBestFragment(new IKAnalyzer(), "birthday", birthday);
609 if (null == lighterBirthday) {
610 lighterBirthday = birthday;
611 }
612
613 Person person = new Person();
614 person.setId(Long.parseLong(id));
615 person.setName(lighterName);
616 person.setAge(NumberUtils.toInt(age));
617 person.setCity(lighterCity);
618 person.setBirthday(DateUtils.longToDate(Long.parseLong(lighterBirthday)));
619 result.add(person);
620 }
621 } catch (Exception e) {
622 e.printStackTrace();
623 } finally {
624 try {
625 indexSearcher.close();
626 } catch (IOException e) {
627 e.printStackTrace();
628 }
629 }
630
631 return result;
632 }
633
634 private List<Person> getPersons() {
635 try {
636 List<Person> persons = new ArrayList<Person>();
637 persons.add(new Person(1l, "张三", 10, "福州", DateUtils.stringToDate("2013-01-07", Consts.FORMAT_SHORT)));
638 persons.add(new Person(2l, "张四", 20, "莆田", DateUtils.stringToDate("2012-01-08", Consts.FORMAT_SHORT)));
639 persons.add(new Person(3l, "王五", 30, "泉州", DateUtils.stringToDate("2011-03-09", Consts.FORMAT_SHORT)));
640 persons.add(new Person(4l, "李四", 40, "厦门", DateUtils.stringToDate("2010-04-10", Consts.FORMAT_SHORT)));
641 persons.add(new Person(5l, "李白", 50, "漳州", DateUtils.stringToDate("2009-05-11", Consts.FORMAT_SHORT)));
642 persons.add(new Person(6l, "林计张三张三张三张三张三张三张三张三张三张三张三张三张三计钦", 60, "龙岩", DateUtils.stringToDate("2008-06-12", Consts.FORMAT_SHORT)));
643 return persons;
644 } catch (ParseException e) {
645 e.printStackTrace();
646 }
647 return null;
648 }
649}
650
2、LuceneUtil类:Lucene增删改工具类
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198 1package com.ljq.utils;
2
3import java.io.File;
4import java.util.ArrayList;
5import java.util.List;
6
7import org.apache.log4j.Logger;
8import org.apache.lucene.document.Document;
9import org.apache.lucene.index.IndexWriter;
10import org.apache.lucene.index.IndexWriterConfig;
11import org.apache.lucene.index.Term;
12import org.apache.lucene.index.IndexWriterConfig.OpenMode;
13import org.apache.lucene.store.Directory;
14import org.apache.lucene.store.FSDirectory;
15import org.apache.lucene.util.Version;
16import org.wltea.analyzer.lucene.IKAnalyzer;
17
18
19/**
20 * lucene工具类,采用IKAnalyzer中文分词器
21 *
22 * @author 林计钦
23 * @version 1.0 2013-6-3 下午03:51:29
24 */
25public class LuceneUtil {
26 /** 索引库路径 */
27 private static final String indexPath = XMLPropertyConfig.getConfigXML()
28 .getString("index_path");
29 public static IndexWriter indexWriter = null;
30 private static final Logger log=Logger.getLogger(LuceneUtil.class);
31
32 public static IndexWriter getIndexWriter(){
33 if(indexWriter == null){
34 try {
35 //索引库路径不存在则新建一个
36 File indexFile=new File(indexPath);
37 if(!indexFile.exists()) indexFile.mkdir();
38
39 Directory fsDirectory = FSDirectory.open(indexFile);
40 IndexWriterConfig confIndex = new IndexWriterConfig(Version.LUCENE_35, new IKAnalyzer());
41 confIndex.setOpenMode(OpenMode.CREATE_OR_APPEND);
42 if (IndexWriter.isLocked(fsDirectory)) {
43 IndexWriter.unlock(fsDirectory);
44 }
45 indexWriter =new IndexWriter(fsDirectory, confIndex);
46 } catch (Exception e) {
47 e.printStackTrace();
48 }
49 }
50 return indexWriter;
51 }
52
53 /**
54 * 创建索引
55 *
56 * @param doc
57 * @throws Exception
58 */
59 public static boolean createIndex(Document doc) {
60 List<Document> docs = new ArrayList<Document>();
61 docs.add(doc);
62 return createIndex(docs);
63 }
64
65 /**
66 * 创建索引
67 *
68 * @param docs
69 * @throws Exception
70 */
71 public static boolean createIndex(List<Document> docs) {
72 try {
73 for (Document doc : docs) {
74 getIndexWriter().addDocument(doc);
75 }
76 // 优化操作
77 getIndexWriter().commit();
78 getIndexWriter().forceMerge(1); // forceMerge代替optimize
79 log.info("lucene create success.");
80 return true;
81 } catch (Exception e) {
82 log.error("lucene create failure.", e);
83 return false;
84 } finally {
85 if (getIndexWriter() != null) {
86 try {
87 getIndexWriter().close();
88 } catch (Exception e) {
89 e.printStackTrace();
90 }
91 }
92 }
93 }
94
95 /**
96 * 更新索引
97 *
98 * 例如:Term term = new Term("id","1234567");
99 * 先去索引文件里查找id为1234567的Document,如果有就更新它(如果有多条,最后更新后只有一条),如果没有就新增。
100 * 数据库更新的时候,我们可以只针对某个列来更新,而lucene只能针对一行数据更新。
101 *
102 * @param field Document的Field(类似数据库的字段)
103 * @param value Field中的一个关键词
104 * @param doc
105 * @return
106 */
107 public static boolean updateIndex(String field, String value, Document doc) {
108 try {
109 getIndexWriter().updateDocument(new Term(field, value), doc);
110
111 log.info("lucene update success.");
112 return true;
113 } catch (Exception e) {
114 log.error("lucene update failure.", e);
115 return false;
116 }finally{
117 if(getIndexWriter()!=null){
118 try {
119 getIndexWriter().close();
120 } catch (Exception e) {
121 e.printStackTrace();
122 }
123 }
124 }
125 }
126
127 /**
128 * 删除索引
129 *
130 * @param field Document的Field(类似数据库的字段)
131 * @param value Field中的一个关键词
132 * @param doc
133 * @return
134 */
135 public static boolean deleteIndex(String field, String value) {
136 try {
137 getIndexWriter().deleteDocuments(new Term(field, value));
138
139 log.info("lucene delete success.");
140 return true;
141 } catch (Exception e) {
142 log.error("lucene delete failure.", e);
143 return false;
144 }finally{
145 if(getIndexWriter()!=null){
146 try {
147 getIndexWriter().close();
148 } catch (Exception e) {
149 e.printStackTrace();
150 }
151 }
152 }
153 }
154
155 /**
156 * 删除整个索引库
157 *
158 * @return
159 */
160 public static boolean deleteAllIndex() {
161 try {
162 getIndexWriter().deleteAll();
163 log.info("lucene delete all success.");
164 return true;
165 } catch (Exception e) {
166 log.error("lucene delete all failure.", e);
167 return false;
168 }finally{
169 if(getIndexWriter()!=null){
170 try {
171 getIndexWriter().close();
172 } catch (Exception e) {
173 e.printStackTrace();
174 }
175 }
176 }
177 }
178
179
180 /**
181 * 判断索引库是否已创建
182 *
183 * @return true:存在,false:不存在
184 * @throws Exception
185 */
186 public static boolean existsIndex() throws Exception {
187 File file = new File(indexPath);
188 if (!file.exists()) {
189 file.mkdirs();
190 }
191 String indexSufix = "/segments.gen";
192 // 根据索引文件segments.gen是否存在判断是否是第一次创建索引
193 File indexFile = new File(indexPath + indexSufix);
194 return indexFile.exists();
195 }
196
197}
198