2012-07-24

Number and Date in Lucene 3.6.0

Lucene 早期只有一種文字欄位,當遇到數值類別的資料時,除了精確查詢外,不管是範圍查詢或者是排序都很難用,後來 Lucene 終於在 2.9.0 支援數值欄位,並且衍生支援日期欄位。

建立數值索引是用 NumericField 取代原本的 Field,查詢數值索引時改用 NumericRangeQuery。

另外,除了純數值欄位外,還有一種文數字混合的欄位,也就是字串裡有數值存在,這可以當作是一種文字來處理。


public class RangeTestCase extends TestCase {

  private static final Version VERSION = Version.LUCENE_36;
  private static final String F_TITLE = "title";
  private static final String F_PRICE = "price";
  private static final String F_PUBLICATION_TIME = "pTime";
  private Directory directory = new RAMDirectory();
  private IndexWriter writer;
  private IndexReader reader;
  private IndexSearcher searcher;

  @Override
  protected void setUp() throws Exception {
    super.setUp();

    // create index
    this.writer = this.createWriter();
    // this.writer.setInfoStream(System.out);

    System.out.println("addDocument...");
    this.writer.addDocument(this.createDocument("Lucene in action", 44.95,
        new Date(104, 11, 1)));
    this.writer.addDocument(this.createDocument("Lucene in action, 2 Edition",
        49.99, new Date(110, 6, 28)));
    this.writer.addDocument(this.createDocument("Spring in action", 7.89,
        new Date(104, 11, 1)));
    this.writer.addDocument(this.createDocument("Spring in action, 2 Edition",
        11.44, new Date(107, 7, 23)));
    this.writer.addDocument(this.createDocument("Spring in action, 3 Edition",
        49.99, new Date(111, 5, 29)));

    System.out.println("commit...");
    // 資料太多時,可以分批 commit
    // commit後(沒有close)就可以使用 IndexReader
    this.writer.commit();
    // 沒有 close 或 commit,不能使用 IndexReader
    // this.closeWriter();
  }

  private Document createDocument(String title, double price, Date date) {
    Document doc = new Document();

    // 文字欄位用 Field
    doc.add(new Field(RangeTestCase.F_TITLE, title, Field.Store.YES,
        Field.Index.ANALYZED));

    // 數值欄位用 NumericField
    doc.add(new NumericField(RangeTestCase.F_PRICE, Field.Store.YES, true).setDoubleValue(price));

    // 日期欄位一樣是用 NumericField,只要轉成 long 值就可以了
    doc.add(new NumericField(RangeTestCase.F_PUBLICATION_TIME, Field.Store.YES,
        true).setLongValue(date.getTime()));

    return doc;
  }

  @Override
  protected void tearDown() throws Exception {
    super.tearDown();
    this.closeWriter();
    this.closeSearcher();
  }

  /**
   * 查詢文字中的數字
   */
  public void testTextWithNumberQuery() {
    System.out.println("testTextWithNumberQuery...");
    try {
      // 必須用 WhitespaceAnalyzer 或 StandardAnalyzer
      // 不可用會丟掉數字的 SimpleAnalyzer 或 StopAnalyzer
      IndexSearcher searcher = this.createSearcher();
      TopDocs hits = searcher.search(new TermQuery(new Term(
          RangeTestCase.F_TITLE, "3")), 100);
      this.showDocuments(searcher, hits);
      assertEquals(1, hits.totalHits);
    }
    catch (IOException e) {
      Assert.fail(e.getMessage());
    }
  }

  /**
   * 查詢數值欄位,包含起訖
   */
  public void testNumericRangeQueryInclusive() {
    System.out.println("testNumericRangeQueryInclusive...");
    try {
      // 查詢 double
      NumericRangeQuery<Double> q = NumericRangeQuery.newDoubleRange(
          RangeTestCase.F_PRICE, 7.89, 44.95, true, true);
      IndexSearcher searcher = this.createSearcher();
      TopDocs hits = searcher.search(q, 100);
      this.showDocuments(searcher, hits);
      assertEquals(3, hits.totalHits);
    }
    catch (IOException e) {
      Assert.fail(e.getMessage());
    }
  }

  /**
   * 查詢數值欄位,不包含起訖
   */
  public void testNumericRangeQueryExclusive() {
    System.out.println("testNumericRangeQueryExclusive...");
    try {
      // 查詢 double
      NumericRangeQuery<Double> q = NumericRangeQuery.newDoubleRange(
          RangeTestCase.F_PRICE, 7.89, 44.95, false, false);
      IndexSearcher searcher = this.createSearcher();
      TopDocs hits = searcher.search(q, 100);
      this.showDocuments(searcher, hits);
      assertEquals(1, hits.totalHits);
    }
    catch (IOException e) {
      Assert.fail(e.getMessage());
    }
  }

  /**
   * 查詢日期欄位,包含起訖
   */
  public void testDateRangeQuery() {
    System.out.println("testDateRangeQuery...");
    try {
      // 查詢 long 日期
      NumericRangeQuery<Long> q = NumericRangeQuery.newLongRange(
          RangeTestCase.F_PUBLICATION_TIME, new Date(107, 7, 23).getTime(),
          new Date(110, 6, 28).getTime(), true, true);
      IndexSearcher searcher = this.createSearcher();
      TopDocs hits = searcher.search(q, 100);
      this.showDocuments(searcher, hits);
      assertEquals(2, hits.totalHits);
    }
    catch (IOException e) {
      Assert.fail(e.getMessage());
    }
  }

  /**
   * 倒出文章
   */
  private void showDocuments(IndexSearcher searcher, TopDocs hits)
      throws CorruptIndexException, IOException {
    Document d;
    for (ScoreDoc sd : hits.scoreDocs) {
      d = searcher.doc(sd.doc);
      System.out.println(d.get(F_TITLE) + " - " + d.get(F_PRICE) + " - "
          + new Date(Long.parseLong(d.get(F_PUBLICATION_TIME))));
    }
  }

  private IndexWriter createWriter() throws CorruptIndexException,
      LockObtainFailedException, IOException {
    IndexWriterConfig config = new IndexWriterConfig(RangeTestCase.VERSION,
        new StandardAnalyzer(RangeTestCase.VERSION));
    config.setOpenMode(OpenMode.CREATE);
    return new IndexWriter(this.directory, config);
  }

  private IndexSearcher createSearcher() throws CorruptIndexException,
      IOException {
    return new IndexSearcher(this.createReader());
  }

  private IndexReader createReader() throws CorruptIndexException, IOException {
    return IndexReader.open(this.directory);
  }

  private void closeWriter() {
    if (this.writer != null) {
      try {
        this.writer.close();
      }
      catch (IOException e) {
        e.printStackTrace();
      }
    }
  }

  private void closeSearcher() {
    this.closeReader();
    if (this.searcher != null) {
      try {
        this.searcher.close();
      }
      catch (IOException e) {
        e.printStackTrace();
      }
    }
  }

  private void closeReader() {
    if (this.reader != null) {
      try {
        this.reader.close();
      }
      catch (IOException e) {
        e.printStackTrace();
      }
    }
  }
}
---

沒有留言:

張貼留言