码迷,mamicode.com
首页 > 其他好文 > 详细

lucnen 中文分词器 和 删除 和修改词库

时间:2019-05-22 09:17:01      阅读:125      评论:0      收藏:0      [点我收藏+]

标签:conf   图片   document   open   director   red   for   apache   对象   

导入jar包

 

技术图片

 

package com.bw.lucene;

import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class WriteDocument {
    static String path = "E://lucene";

    public static void main(String[] args) throws Exception {
        // writeDoc();
        //deleteDocById();
        update();
        queryAll();
        // System.out.println("删除以后");
    }

    public static void queryParser() throws Exception {
        FSDirectory directory = FSDirectory.open(Paths.get(path));
        DirectoryReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new IKAnalyzer(true);
        QueryParser parser = new QueryParser("content", analyzer);
        Query query = parser.parse("大数据");
        TopDocs search = searcher.search(query, 10);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int docID = scoreDoc.doc;
            Document doc = searcher.doc(docID);
            System.out.println(Artical.toAritical(doc));
        }
    }

    public static void writeDoc() throws Exception {
        FSDirectory directory = FSDirectory.open(Paths.get(path));
        // 使用中文分词器 开启smart模式 智能模式
        Analyzer analyzer = new IKAnalyzer(true);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        Artical art = new Artical();
        art.setAuthor("海员");
        art.setContent("大数据那家强,北京找北网 学不会报销往返路费 学会后在交费");
        art.setId(129L);
        art.setTitle("招生简章");
        art.setUrl("www.txstory.com");
        writer.addDocument(art.toDocument());
        writer.close();
    }

    // 修改
    public static void update() throws Exception {
        FSDirectory directory = FSDirectory.open(Paths.get(path));
        // 使用中文分词器 开启smart模式 智能模式
        Analyzer analyzer = new IKAnalyzer(true);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        Artical artical = new Artical();
        artical.setAuthor("海员");
        artical.setContent("大学生班开班了");
        artical.setId(130L);
        artical.setTitle("招生简章");
        artical.setUrl("www.txstory.com"); 
        
        
        
        writer.updateDocument(new Term("author","海员"),artical.toDocument());
        writer.commit();
        writer.close();
        
        
    }
    
    // 删除

    // 根据id删除
    public static void deleteDocById() throws Exception {
        FSDirectory directory = FSDirectory.open(Paths.get(path));
        // 使用中文分词器 开启smart模式 智能模式
        Analyzer analyzer = new IKAnalyzer(true);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        // Query query = LongPoint.newRangeQuery("id", 126, 128);// 都删掉 126到128 包括他们都删掉
        Query query = LongPoint.newExactQuery("id", 129);// 单个删除 129
        writer.deleteDocuments(query);
        writer.commit();
        writer.close();
    }

    // 对数据的删除要用到write对象 根据词条删除
    public static void deleteDoc() throws Exception {
        FSDirectory directory = FSDirectory.open(Paths.get(path));
        // 使用中文分词器 开启smart模式 智能模式
        Analyzer analyzer = new IKAnalyzer(true);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        QueryParser parser = new QueryParser("title", analyzer);
        Query query = parser.parse("招生");
        writer.deleteDocuments(query);
        writer.commit();
        writer.close();
    }

    // 查询所有
    public static void queryAll() throws Exception {
        FSDirectory directory = FSDirectory.open(Paths.get(path));
        DirectoryReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        Query query = new MatchAllDocsQuery();
        TopDocs search = searcher.search(query, 10);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int docID = scoreDoc.doc;
            Document doc = searcher.doc(docID);
            System.out.println(Artical.toAritical(doc));
        }
    }
}

 

lucnen 中文分词器 和 删除 和修改词库

标签:conf   图片   document   open   director   red   for   apache   对象   

原文地址:https://www.cnblogs.com/JBLi/p/10903714.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!