(一)索引的创建步骤:
学习Lucene,最重要的一点在于索引的建立,这是一切搜索等的基础,Lucene6创建索引的步骤如下:
(1)创建目录(Directory),(即多线程支持创建);
(2)词库分析器(Analyzer)的创建(要注意使用的是哪种Analyzer,创建的时候也要使用对应的索引器);
(3)IndexWriterConfig对象创建,获取IndexWriter对象,判断覆盖/追加索引;
(3)遍历索引的对象列表,创建文件对象(Document),添加块(Field)等;
(4)通过IndexWriter将文档添加到索引中;
(5)结束索引创建过程,IndexWriter执行close()结束。
(二)代码示例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99 1pom.xml的配置:我用的是lucene6.4.1,用其他的也可以
2<?xml version="1.0" encoding="UTF-8"?>
3<project xmlns="http://maven.apache.org/POM/4.0.0"
4 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
6 <modelVersion>4.0.0</modelVersion>
7
8 <groupId>IDC</groupId>
9 <artifactId>luc</artifactId>
10 <version>1.0-SNAPSHOT</version>
11 <build>
12 <plugins>
13 <plugin>
14 <groupId>org.apache.maven.plugins</groupId>
15 <artifactId>maven-compiler-plugin</artifactId>
16 <configuration>
17 <source>1.7</source>
18 <target>1.7</target>
19 </configuration>
20 </plugin>
21 </plugins>
22 </build>
23
24 <properties>
25 <lucene.version>6.4.1</lucene.version>
26 </properties>
27 <dependencies>
28 <dependency>
29 <groupId>org.apache.lucene</groupId>
30 <artifactId>lucene-core</artifactId>
31 <version>${lucene.version}</version>
32 </dependency>
33
34 <dependency>
35 <groupId>org.apache.lucene</groupId>
36 <artifactId>lucene-highlighter</artifactId>
37 <version>${lucene.version}</version>
38 </dependency>
39 <dependency>
40 <groupId>com.google.zxing</groupId>
41 <artifactId>core</artifactId>
42 <version>3.2.0</version>
43 </dependency>
44 <dependency>
45 <groupId>com.chenlb.mmseg4j</groupId>
46 <artifactId>mmseg4j-analysis</artifactId>
47 <version>1.9.1</version>
48 </dependency>
49 <dependency>
50 <groupId>org.apache.lucene</groupId>
51 <artifactId>lucene-core</artifactId>
52 <version>${lucene.version}</version>
53 </dependency>
54
55 <dependency>
56 <groupId>org.apache.lucene</groupId>
57 <artifactId>lucene-queryparser</artifactId>
58 <version>${lucene.version}</version>
59 </dependency>
60 <dependency>
61 <groupId>org.apache.lucene</groupId>
62 <artifactId>lucene-analyzers-common</artifactId>
63 <version>${lucene.version}</version>
64 </dependency>
65 <dependency>
66 <groupId>org.apache.lucene</groupId>
67 <artifactId>lucene-analyzers-smartcn</artifactId>
68 <version>${lucene.version}</version>
69 </dependency>
70 <dependency>
71 <groupId>org.apache.lucene</groupId>
72 <artifactId>lucene-memory</artifactId>
73 <version>${lucene.version}</version>
74 </dependency>
75 <dependency>
76 <groupId>org.apache.lucene</groupId>
77 <artifactId>lucene-queries</artifactId>
78 <version>${lucene.version}</version>
79 </dependency>
80 <dependency>
81 <groupId>org.apache.lucene</groupId>
82 <artifactId>lucene-demo</artifactId>
83 <version>${lucene.version}</version>
84 </dependency>
85 <dependency>
86 <groupId>junit</groupId>
87 <artifactId>junit</artifactId>
88 <version>4.12</version>
89 </dependency>
90 <dependency>
91 <groupId>org.junit.jupiter</groupId>
92 <artifactId>junit-jupiter-api</artifactId>
93 <version>RELEASE</version>
94 </dependency>
95
96 </dependencies>
97 <!-- lucene end -->
98</project>
99
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84 1**java(IDEA)代码:**
2package com.Licene6;
3
4import org.apache.lucene.analysis.standard.StandardAnalyzer;
5import org.apache.lucene.document.Document;
6import org.apache.lucene.document.Field;
7import org.apache.lucene.document.TextField;
8import org.apache.lucene.index.IndexWriter;
9import org.apache.lucene.index.IndexWriterConfig;
10import org.apache.lucene.store.Directory;
11import org.apache.lucene.store.FSDirectory;
12
13import java.io.File;
14import java.io.FileFilter;
15import java.io.FileReader;
16import java.io.IOException;
17import java.nio.file.Paths;
18
19/***Created by Mo
20 *On 2017/8/18 ***13:39.
21 ******/
22public class Index {
23 private IndexWriter writer;//写入索引的类
24 //FileFilter的实现类,用来过滤符合条件的文档。
25 private static class TextFilesFilter implements FileFilter {
26 @Override//重构
27 public boolean accept(File pathname) {
28 return pathname.getName().toLowerCase().endsWith(".txt");
29 }
30 }
31 //构造方法,用来传入索引存放路径
32 public Index(String indexdirectory) throws IOException {
33 Directory directory = FSDirectory.open(Paths.get(indexdirectory));//打开目录
34 //索引
35 IndexWriterConfig config=new IndexWriterConfig(new StandardAnalyzer());
36 config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
37 writer=new IndexWriter(directory,config);
38 }
39 //关闭indexWriter,不要忘记了
40 public void close() throws IOException{
41 writer.close();
42 }
43 //遍历文件夹下所有文件,选择符合条件文件,写入索引的方法
44 public int index(String dataDir,FileFilter filter) throws IOException{
45 File[] files=new File(dataDir).listFiles();
46 for(File file:files){
47 if(!file.isDirectory() && !file.isHidden()
48 && file.exists()
49 && file.canRead()
50 && (filter==null) || filter.accept(file)){
51 indexFile(file);
52 }
53 }
54 return writer.numDocs();//返回写入的文档总数
55 }
56 //写入索引的方法,将生成的Document(目录)对象写入到索引中
57 private void indexFile(File file) throws IOException{
58 System.out.println("indexing..."+file.getCanonicalPath());
59 Document doc=getDocument(file);
60 writer.addDocument(doc);
61 }
62 //生成Document对象的方法,Document对象就是对文档各个属性的封装
63 protected Document getDocument(File file) throws IOException{
64 Document doc=new Document();
65 doc.add(new Field("contents",new FileReader(file), TextField.TYPE_NOT_STORED));//分析但不存储
66 doc.add(new Field("filename",file.getName(),TextField.TYPE_STORED));//存储并分词
67 doc.add(new Field("fullpath",file.getCanonicalPath(),TextField.TYPE_STORED));//存储并分词
68 return doc;
69 }
70
71 public static void main(String[] args) throws IOException {
72 String indexDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\index";//目录,里边可以没有内容
73 String dataDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\data";//文件,里边要有.txt文件
74
75 long start=System.currentTimeMillis();//当前时间
76 Index index =new Index(indexDir);
77 int numberIndexed= index.index(dataDir, new TextFilesFilter());//写入索引
78 index.close();//关闭,这个是需要的
79 long end=System.currentTimeMillis();
80 System.out.println(numberIndexed);
81 System.out.println(end-start);//索引时间
82 }
83}
84
参考: