Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   package org.apache.lucene.analysis;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   
   22   import org.apache.lucene.analysis.tokenattributes.TermAttribute;
   23   
   24   /**
   25    * Removes words that are too long or too short from the stream.
   26    */
   27   public final class LengthFilter extends TokenFilter {
   28   
   29     final int min;
   30     final int max;
   31     
   32     private TermAttribute termAtt;
   33   
   34     /**
   35      * Build a filter that removes words that are too long or too
   36      * short from the text.
   37      */
   38     public LengthFilter(TokenStream in, int min, int max)
   39     {
   40       super(in);
   41       this.min = min;
   42       this.max = max;
   43       termAtt = addAttribute(TermAttribute.class);
   44     }
   45     
   46     /**
   47      * Returns the next input Token whose term() is the right len
   48      */
   49     @Override
   50     public final boolean incrementToken() throws IOException {
   51       // return the first non-stop word found
   52       while (input.incrementToken()) {
   53         int len = termAtt.termLength();
   54         if (len >= min && len <= max) {
   55             return true;
   56         }
   57         // note: else we ignore it but should we index each part of it?
   58       }
   59       // reached EOS -- return false
   60       return false;
   61     }
   62   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]