Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   package org.apache.lucene.analysis;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.util.AttributeSource;
   21   import org.apache.lucene.util.NumericUtils;
   22   import org.apache.lucene.document.NumericField; // for javadocs
   23   import org.apache.lucene.search.NumericRangeQuery; // for javadocs
   24   import org.apache.lucene.search.NumericRangeFilter; // for javadocs
   25   import org.apache.lucene.search.SortField; // for javadocs
   26   import org.apache.lucene.search.FieldCache; // javadocs
   27   import org.apache.lucene.analysis.tokenattributes.TermAttribute;
   28   import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
   29   import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
   30   
   31   /**
   32    * <b>Expert:</b> This class provides a {@link TokenStream}
   33    * for indexing numeric values that can be used by {@link
   34    * NumericRangeQuery} or {@link NumericRangeFilter}.
   35    *
   36    * <p>Note that for simple usage, {@link NumericField} is
   37    * recommended.  {@link NumericField} disables norms and
   38    * term freqs, as they are not usually needed during
   39    * searching.  If you need to change these settings, you
   40    * should use this class.
   41    *
   42    * <p>See {@link NumericField} for capabilities of fields
   43    * indexed numerically.</p>
   44    *
   45    * <p>Here's an example usage, for an <code>int</code> field:
   46    *
   47    * <pre>
   48    *  Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
   49    *  field.setOmitNorms(true);
   50    *  field.setOmitTermFreqAndPositions(true);
   51    *  document.add(field);
   52    * </pre>
   53    *
   54    * <p>For optimal performance, re-use the TokenStream and Field instance
   55    * for more than one document:
   56    *
   57    * <pre>
   58    *  NumericTokenStream stream = new NumericTokenStream(precisionStep);
   59    *  Field field = new Field(name, stream);
   60    *  field.setOmitNorms(true);
   61    *  field.setOmitTermFreqAndPositions(true);
   62    *  Document document = new Document();
   63    *  document.add(field);
   64    *
   65    *  for(all documents) {
   66    *    stream.setIntValue(value)
   67    *    writer.addDocument(document);
   68    *  }
   69    * </pre>
   70    *
   71    * <p>This stream is not intended to be used in analyzers;
   72    * it's more for iterating the different precisions during
   73    * indexing a specific numeric value.</p>
   74   
   75    * <p><b>NOTE</b>: as token streams are only consumed once
   76    * the document is added to the index, if you index more
   77    * than one numeric field, use a separate <code>NumericTokenStream</code>
   78    * instance for each.</p>
   79    *
   80    * <p>See {@link NumericRangeQuery} for more details on the
   81    * <a
   82    * href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
   83    * parameter as well as how numeric fields work under the hood.</p>
   84    *
   85    * <p><font color="red"><b>NOTE:</b> This API is experimental and
   86    * might change in incompatible ways in the next release.</font>
   87    *
   88    * @since 2.9
   89    */
   90   public final class NumericTokenStream extends TokenStream {
   91   
   92     /** The full precision token gets this token type assigned. */
   93     public static final String TOKEN_TYPE_FULL_PREC  = "fullPrecNumeric";
   94   
   95     /** The lower precision tokens gets this token type assigned. */
   96     public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
   97   
   98     /**
   99      * Creates a token stream for numeric values using the default <code>precisionStep</code>
  100      * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
  101      * before using set a value using the various set<em>???</em>Value() methods.
  102      */
  103     public NumericTokenStream() {
  104       this(NumericUtils.PRECISION_STEP_DEFAULT);
  105     }
  106     
  107     /**
  108      * Creates a token stream for numeric values with the specified
  109      * <code>precisionStep</code>. The stream is not yet initialized,
  110      * before using set a value using the various set<em>???</em>Value() methods.
  111      */
  112     public NumericTokenStream(final int precisionStep) {
  113       super();
  114       this.precisionStep = precisionStep;
  115       if (precisionStep < 1)
  116         throw new IllegalArgumentException("precisionStep must be >=1");
  117     }
  118   
  119     /**
  120      * Expert: Creates a token stream for numeric values with the specified
  121      * <code>precisionStep</code> using the given {@link AttributeSource}.
  122      * The stream is not yet initialized,
  123      * before using set a value using the various set<em>???</em>Value() methods.
  124      */
  125     public NumericTokenStream(AttributeSource source, final int precisionStep) {
  126       super(source);
  127       this.precisionStep = precisionStep;
  128       if (precisionStep < 1)
  129         throw new IllegalArgumentException("precisionStep must be >=1");
  130     }
  131   
  132     /**
  133      * Expert: Creates a token stream for numeric values with the specified
  134      * <code>precisionStep</code> using the given
  135      * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
  136      * The stream is not yet initialized,
  137      * before using set a value using the various set<em>???</em>Value() methods.
  138      */
  139     public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
  140       super(factory);
  141       this.precisionStep = precisionStep;
  142       if (precisionStep < 1)
  143         throw new IllegalArgumentException("precisionStep must be >=1");
  144     }
  145   
  146     /**
  147      * Initializes the token stream with the supplied <code>long</code> value.
  148      * @param value the value, for which this TokenStream should enumerate tokens.
  149      * @return this instance, because of this you can use it the following way:
  150      * <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code>
  151      */
  152     public NumericTokenStream setLongValue(final long value) {
  153       this.value = value;
  154       valSize = 64;
  155       shift = 0;
  156       return this;
  157     }
  158     
  159     /**
  160      * Initializes the token stream with the supplied <code>int</code> value.
  161      * @param value the value, for which this TokenStream should enumerate tokens.
  162      * @return this instance, because of this you can use it the following way:
  163      * <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code>
  164      */
  165     public NumericTokenStream setIntValue(final int value) {
  166       this.value = (long) value;
  167       valSize = 32;
  168       shift = 0;
  169       return this;
  170     }
  171     
  172     /**
  173      * Initializes the token stream with the supplied <code>double</code> value.
  174      * @param value the value, for which this TokenStream should enumerate tokens.
  175      * @return this instance, because of this you can use it the following way:
  176      * <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code>
  177      */
  178     public NumericTokenStream setDoubleValue(final double value) {
  179       this.value = NumericUtils.doubleToSortableLong(value);
  180       valSize = 64;
  181       shift = 0;
  182       return this;
  183     }
  184     
  185     /**
  186      * Initializes the token stream with the supplied <code>float</code> value.
  187      * @param value the value, for which this TokenStream should enumerate tokens.
  188      * @return this instance, because of this you can use it the following way:
  189      * <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code>
  190      */
  191     public NumericTokenStream setFloatValue(final float value) {
  192       this.value = (long) NumericUtils.floatToSortableInt(value);
  193       valSize = 32;
  194       shift = 0;
  195       return this;
  196     }
  197     
  198     @Override
  199     public void reset() {
  200       if (valSize == 0)
  201         throw new IllegalStateException("call set???Value() before usage");
  202       shift = 0;
  203     }
  204   
  205     @Override
  206     public boolean incrementToken() {
  207       if (valSize == 0)
  208         throw new IllegalStateException("call set???Value() before usage");
  209       if (shift >= valSize)
  210         return false;
  211   
  212       clearAttributes();
  213       final char[] buffer;
  214       switch (valSize) {
  215         case 64:
  216           buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
  217           termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
  218           break;
  219         
  220         case 32:
  221           buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
  222           termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
  223           break;
  224         
  225         default:
  226           // should not happen
  227           throw new IllegalArgumentException("valSize must be 32 or 64");
  228       }
  229       
  230       typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
  231       posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
  232       shift += precisionStep;
  233       return true;
  234     }
  235     
  236     @Override
  237     public String toString() {
  238       final StringBuilder sb = new StringBuilder("(numeric,valSize=").append(valSize);
  239       sb.append(",precisionStep=").append(precisionStep).append(')');
  240       return sb.toString();
  241     }
  242   
  243     // members
  244     private final TermAttribute termAtt = addAttribute(TermAttribute.class);
  245     private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  246     private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  247     
  248     private int shift = 0, valSize = 0; // valSize==0 means not initialized
  249     private final int precisionStep;
  250     
  251     private long value = 0L;
  252   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]