lucene创建索引

释放双眼，带上耳机，听听看~！

1.导入jar包

2.创建实体Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
1package com.zhishang.lucene;

2

3/**

4 * Created by Administrator on 2017/7/8.

5 */

6public class HtmlBean {

7    private String title;

8    private String content;

9    private String url;

10

11    public void setTitle(String title) {

12        this.title = title;

13    }

14

15    public void setContent(String content) {

16        this.content = content;

17    }

18

19    public void setUrl(String url) {

20        this.url = url;

21    }

22

23    public String getTitle() {

24        return title;

25    }

26

27    public String getContent() {

28        return content;

29    }

30

31    public String getUrl() {

32        return url;

33    }

34}

35

3.创建工具Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
1package com.zhishang.lucene;

2

3import net.htmlparser.jericho.Element;

4import net.htmlparser.jericho.HTMLElementName;

5import net.htmlparser.jericho.Source;

6import org.junit.Test;

7

8import java.io.File;

9import java.io.IOException;

10

11/**

12 * Created by Administrator on 2017/7/8.

13 */

14public class HtmlBeanUtil {

15

16

17    public static HtmlBean parseHtml(File file){

18        try {

19            Source sc = new Source(file);

20            Element element = sc.getFirstElement(HTMLElementName.TITLE);

21            if (element == null || element.getTextExtractor() == null){

22                return null;

23            }

24

25            HtmlBean htmlBean = new HtmlBean();

26            htmlBean.setTitle(element.getTextExtractor().toString());

27            htmlBean.setContent(sc.getTextExtractor().toString());

28            htmlBean.setUrl(file.getAbsolutePath());

29

30            return htmlBean;

31        } catch (IOException e) {

32            e.printStackTrace();

33        }

34

35        return null;

36    }

37}

38

4.创建操作Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
1package com.zhishang.lucene;

2

3import org.apache.commons.io.FileUtils;

4import org.apache.commons.io.filefilter.TrueFileFilter;

5import org.apache.lucene.analysis.Analyzer;

6import org.apache.lucene.analysis.standard.StandardAnalyzer;

7import org.apache.lucene.document.*;

8import org.apache.lucene.index.IndexWriter;

9import org.apache.lucene.index.IndexWriterConfig;

10import org.apache.lucene.store.Directory;

11import org.apache.lucene.store.FSDirectory;

12import org.apache.lucene.store.RAMDirectory;

13import org.apache.lucene.util.Version;

14import org.junit.Test;

15import org.wltea.analyzer.lucene.IKAnalyzer;

16

17import java.io.File;

18import java.io.IOException;

19import java.util.Collection;

20

21/**

22 * Created by Administrator on 2017/7/7.

23 */

24public class CreateIndex {

25    public static final String indexDir = &quot;G:/index&quot;;

26    public static final String dataDir = &quot;G:/data&quot;;

27

28    public void createIndex(){

29        try {

30            Directory dir = FSDirectory.open(new File(indexDir));

31            //分词器

32            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);

33            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9,analyzer);

34            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

35            IndexWriter writer = new IndexWriter(dir,config);

36            File file = new File(dataDir);

37

38            RAMDirectory ramdir = new RAMDirectory();

39            Analyzer analyzer1 = new IKAnalyzer();

40            IndexWriterConfig config1 = new IndexWriterConfig(Version.LUCENE_4_9,analyzer1);

41            IndexWriter ramWriter = new IndexWriter(ramdir,config1);

42

43            Collection&lt;File&gt; files = FileUtils.listFiles(file, TrueFileFilter.INSTANCE,TrueFileFilter.INSTANCE);

44            int count = 0;

45            for(File f:files){

46                HtmlBean bean =  HtmlBeanUtil.parseHtml(f);

47                if(bean != null){

48                    Document document = new Document();

49                    document.add(new StringField(&quot;title&quot;,bean.getTitle(), Field.Store.YES));

50                    document.add(new TextField(&quot;content&quot;,bean.getContent(), Field.Store.YES));

51                    document.add(new StringField(&quot;url&quot;,bean.getUrl(), Field.Store.YES));

52                    ramWriter.addDocument(document);

53                    count++;

54                    if (count == 50){

55                        ramWriter.close();

56                        writer.addIndexes(ramdir);

57                        ramdir = new RAMDirectory();

58                        Analyzer analyzer2 = new IKAnalyzer();

59                        IndexWriterConfig config2 = new IndexWriterConfig(Version.LUCENE_4_9,analyzer2);

60                        ramWriter = new IndexWriter(ramdir,config2);

61                        count = 0;

62                    }

63

64                }

65            }

66            writer.close();

67        } catch (IOException e) {

68            e.printStackTrace();

69        }

70

71    }

72}

73

5.创建测试Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
1package com.zhishang.lucene;

2

3import org.apache.lucene.analysis.Analyzer;

4import org.apache.lucene.analysis.standard.StandardAnalyzer;

5import org.apache.lucene.index.IndexWriter;

6import org.apache.lucene.index.IndexWriterConfig;

7import org.apache.lucene.store.Directory;

8import org.apache.lucene.store.FSDirectory;

9import org.apache.lucene.util.Version;

10import org.junit.Test;

11

12import java.io.File;

13

14/**

15 * Created by Administrator on 2017/7/8.

16 */

17public class LuceneBean {

18

19    /*

20    创建索引

21     */

22    @Test

23    public void createIndex(){

24        File file = new File(CreateIndex.indexDir);

25        if (file.exists()){

26            file.delete();

27            file.mkdirs();

28        }

29        CreateIndex createIndex = new CreateIndex();

30        createIndex.createIndex();

31    }

32}

33

6.查看生成的索引文件

本文转自素颜猪 51CTO博客，原文链接:http://blog.51cto.com/suyanzhu/1945466

{{userData.name}}已认证

1.导入jar包

2.创建实体Bean

3.创建工具Bean

4.创建操作Bean

5.创建测试Bean

6.查看生成的索引文件

OpenSSH-8.7p1离线升级修复安全漏洞

设计模式的设计原则

{{userData.name}}已认证

1.导入jar包

2.创建实体Bean

3.创建工具Bean

4.创建操作Bean

5.创建测试Bean

6.查看生成的索引文件

Related posts:

OpenSSH-8.7p1离线升级修复安全漏洞

设计模式的设计原则

Lucene.Net 2.3.1开发介绍 —— 三、索引（五）

hadoop组件---面向列的开源数据库(八)--java使用phoenix查询hbase

Hbase常用优化、Hbae性能优化、Hbase优化经验总结

Elasticsearch性能优化实战指南