Lucene

From Evgeny Goldin

Jump to: navigation, search

  • Recall measures how well the search system finds relevant documents.
  • Precision measures how well the system filters out the irrelevant documents.


Docs


IndexWriter

IndexWriter writer = new IndexWriter( FSDirectory.open( new File( indexDir )),
                                      new StandardAnalyzer( Version.LUCENE_30 ),
                                      true,
                                      IndexWriter.MaxFieldLength.UNLIMITED );
File     f   = ...
Document doc = new Document();
 
doc.add( new Field( "contents", new FileReader( f )));
doc.add( new Field( "filename", f.getName(),          Field.Store.YES, Field.Index.NOT_ANALYZED ));
doc.add( new Field( "fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED ));
 
writer.addDocument( doc );
writer.close();


IndexSearcher

IndexSearcher is     = new IndexSearcher( FSDirectory.open( new File( indexDir )));
QueryParser   parser = new QueryParser  ( Version.LUCENE_30,
                                          "contents",
                                          new StandardAnalyzer( Version.LUCENE_30 ));
 
Query   query = parser.parse( "..." );
TopDocs hits  = is.search(query, 10);
 
for( ScoreDoc scoreDoc : hits.scoreDocs )
{
    Document doc = is.doc( scoreDoc.doc );
    System.out.println( doc.get( "fullpath" ));
}
 
is.close();


/**
 * Find top 10 documents that contain the word "lucene" in a field named "contents", sorting the documents by descending relevance
 */
IndexSearcher searcher = new IndexSearcher( FSDirectory.open( new File( "/tmp/index" )));
Query         q        = new TermQuery( new Term( "contents", "lucene" ));
TopDocs       hits     = searcher.search(q, 10);
 
searcher.close();
Personal tools