Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   package org.apache.lucene.analysis;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   import java.io.Reader;
   22   
   23   import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
   24   import org.apache.lucene.analysis.tokenattributes.TermAttribute;
   25   import org.apache.lucene.util.AttributeSource;
   26   
   27   /**
   28    * Emits the entire input as a single token.
   29    */
   30   public final class KeywordTokenizer extends Tokenizer {
   31     
   32     private static final int DEFAULT_BUFFER_SIZE = 256;
   33   
   34     private boolean done;
   35     private int finalOffset;
   36     private TermAttribute termAtt;
   37     private OffsetAttribute offsetAtt;
   38     
   39     public KeywordTokenizer(Reader input) {
   40       this(input, DEFAULT_BUFFER_SIZE);
   41     }
   42   
   43     public KeywordTokenizer(Reader input, int bufferSize) {
   44       super(input);
   45       init(bufferSize);
   46     }
   47   
   48     public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) {
   49       super(source, input);
   50       init(bufferSize);
   51     }
   52   
   53     public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
   54       super(factory, input);
   55       init(bufferSize);
   56     }
   57     
   58     private void init(int bufferSize) {
   59       this.done = false;
   60       termAtt = addAttribute(TermAttribute.class);
   61       offsetAtt = addAttribute(OffsetAttribute.class);
   62       termAtt.resizeTermBuffer(bufferSize);    
   63     }
   64     
   65     @Override
   66     public final boolean incrementToken() throws IOException {
   67       if (!done) {
   68         clearAttributes();
   69         done = true;
   70         int upto = 0;
   71         char[] buffer = termAtt.termBuffer();
   72         while (true) {
   73           final int length = input.read(buffer, upto, buffer.length-upto);
   74           if (length == -1) break;
   75           upto += length;
   76           if (upto == buffer.length)
   77             buffer = termAtt.resizeTermBuffer(1+buffer.length);
   78         }
   79         termAtt.setTermLength(upto);
   80         finalOffset = correctOffset(upto);
   81         offsetAtt.setOffset(correctOffset(0), finalOffset);
   82         return true;
   83       }
   84       return false;
   85     }
   86     
   87     @Override
   88     public final void end() {
   89       // set final offset 
   90       offsetAtt.setOffset(finalOffset, finalOffset);
   91     }
   92   
   93     @Override
   94     public void reset(Reader input) throws IOException {
   95       super.reset(input);
   96       this.done = false;
   97     }
   98   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]