lucene创建索引

释放双眼,带上耳机,听听看~!

1.导入jar包

2.创建实体Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
1package com.zhishang.lucene;
2
3/**
4 * Created by Administrator on 2017/7/8.
5 */
6public class HtmlBean {
7    private String title;
8    private String content;
9    private String url;
10
11    public void setTitle(String title) {
12        this.title = title;
13    }
14
15    public void setContent(String content) {
16        this.content = content;
17    }
18
19    public void setUrl(String url) {
20        this.url = url;
21    }
22
23    public String getTitle() {
24        return title;
25    }
26
27    public String getContent() {
28        return content;
29    }
30
31    public String getUrl() {
32        return url;
33    }
34}
35

3.创建工具Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
1package com.zhishang.lucene;
2
3import net.htmlparser.jericho.Element;
4import net.htmlparser.jericho.HTMLElementName;
5import net.htmlparser.jericho.Source;
6import org.junit.Test;
7
8import java.io.File;
9import java.io.IOException;
10
11/**
12 * Created by Administrator on 2017/7/8.
13 */
14public class HtmlBeanUtil {
15
16
17    public static HtmlBean parseHtml(File file){
18        try {
19            Source sc = new Source(file);
20            Element element = sc.getFirstElement(HTMLElementName.TITLE);
21            if (element == null || element.getTextExtractor() == null){
22                return null;
23            }
24
25            HtmlBean htmlBean = new HtmlBean();
26            htmlBean.setTitle(element.getTextExtractor().toString());
27            htmlBean.setContent(sc.getTextExtractor().toString());
28            htmlBean.setUrl(file.getAbsolutePath());
29
30            return htmlBean;
31        } catch (IOException e) {
32            e.printStackTrace();
33        }
34
35        return null;
36    }
37}
38

4.创建操作Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
1package com.zhishang.lucene;
2
3import org.apache.commons.io.FileUtils;
4import org.apache.commons.io.filefilter.TrueFileFilter;
5import org.apache.lucene.analysis.Analyzer;
6import org.apache.lucene.analysis.standard.StandardAnalyzer;
7import org.apache.lucene.document.*;
8import org.apache.lucene.index.IndexWriter;
9import org.apache.lucene.index.IndexWriterConfig;
10import org.apache.lucene.store.Directory;
11import org.apache.lucene.store.FSDirectory;
12import org.apache.lucene.store.RAMDirectory;
13import org.apache.lucene.util.Version;
14import org.junit.Test;
15import org.wltea.analyzer.lucene.IKAnalyzer;
16
17import java.io.File;
18import java.io.IOException;
19import java.util.Collection;
20
21/**
22 * Created by Administrator on 2017/7/7.
23 */
24public class CreateIndex {
25    public static final String indexDir = "G:/index";
26    public static final String dataDir = "G:/data";
27
28    public void createIndex(){
29        try {
30            Directory dir = FSDirectory.open(new File(indexDir));
31            //分词器
32            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
33            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9,analyzer);
34            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
35            IndexWriter writer = new IndexWriter(dir,config);
36            File file = new File(dataDir);
37
38            RAMDirectory ramdir = new RAMDirectory();
39            Analyzer analyzer1 = new IKAnalyzer();
40            IndexWriterConfig config1 = new IndexWriterConfig(Version.LUCENE_4_9,analyzer1);
41            IndexWriter ramWriter = new IndexWriter(ramdir,config1);
42
43            Collection<File> files = FileUtils.listFiles(file, TrueFileFilter.INSTANCE,TrueFileFilter.INSTANCE);
44            int count = 0;
45            for(File f:files){
46                HtmlBean bean =  HtmlBeanUtil.parseHtml(f);
47                if(bean != null){
48                    Document document = new Document();
49                    document.add(new StringField("title",bean.getTitle(), Field.Store.YES));
50                    document.add(new TextField("content",bean.getContent(), Field.Store.YES));
51                    document.add(new StringField("url",bean.getUrl(), Field.Store.YES));
52                    ramWriter.addDocument(document);
53                    count++;
54                    if (count == 50){
55                        ramWriter.close();
56                        writer.addIndexes(ramdir);
57                        ramdir = new RAMDirectory();
58                        Analyzer analyzer2 = new IKAnalyzer();
59                        IndexWriterConfig config2 = new IndexWriterConfig(Version.LUCENE_4_9,analyzer2);
60                        ramWriter = new IndexWriter(ramdir,config2);
61                        count = 0;
62                    }
63
64                }
65            }
66            writer.close();
67        } catch (IOException e) {
68            e.printStackTrace();
69        }
70
71    }
72}
73

5.创建测试Bean


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
1package com.zhishang.lucene;
2
3import org.apache.lucene.analysis.Analyzer;
4import org.apache.lucene.analysis.standard.StandardAnalyzer;
5import org.apache.lucene.index.IndexWriter;
6import org.apache.lucene.index.IndexWriterConfig;
7import org.apache.lucene.store.Directory;
8import org.apache.lucene.store.FSDirectory;
9import org.apache.lucene.util.Version;
10import org.junit.Test;
11
12import java.io.File;
13
14/**
15 * Created by Administrator on 2017/7/8.
16 */
17public class LuceneBean {
18
19    /*
20    创建索引
21     */
22    @Test
23    public void createIndex(){
24        File file = new File(CreateIndex.indexDir);
25        if (file.exists()){
26            file.delete();
27            file.mkdirs();
28        }
29        CreateIndex createIndex = new CreateIndex();
30        createIndex.createIndex();
31    }
32}
33

6.查看生成的索引文件

本文转自 素颜猪 51CTO博客,原文链接:http://blog.51cto.com/suyanzhu/1945466

给TA打赏
共{{data.count}}人
人已打赏
安全运维

OpenSSH-8.7p1离线升级修复安全漏洞

2021-10-23 10:13:25

安全运维

设计模式的设计原则

2021-12-12 17:36:11

个人中心
购物车
优惠劵
今日签到
有新私信 私信列表
搜索