core/org.eclipse.smila.solr/lib/source/org/apache/lucene/analysis/core/LowerCaseTokenizer.java - smila/org.eclipse.smila.core - Git at Google

 package org.apache.lucene.analysis.core;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.Version;

 /**
  * LowerCaseTokenizer performs the function of LetterTokenizer
  * and LowerCaseFilter together.  It divides text at non-letters and converts
  * them to lower case.  While it is functionally equivalent to the combination
  * of LetterTokenizer and LowerCaseFilter, there is a performance advantage
  * to doing the two tasks at once, hence this (redundant) implementation.
  * <P>
  * Note: this does a decent job for most European languages, but does a terrible
  * job for some Asian languages, where words are not separated by spaces.
  * </p>
  * <p>
  * <a name="version"/>
  * You may specify the required {@link Version} compatibility when creating
  * {@link LowerCaseTokenizer}:
  * <ul>
  * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
  * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
  * {@link CharTokenizer#normalize(int)} for details.</li>
  * </ul>
  * </p>
  */
 public final class LowerCaseTokenizer extends LetterTokenizer {

   /**
    * Construct a new LowerCaseTokenizer.
    * @param in
    *          the input to split up into tokens
    */
   public LowerCaseTokenizer(Reader in) {
     super(in);
   }

   /**
    * @deprecated Use {@link #LowerCaseTokenizer(Reader)}
    */
   @Deprecated
   public LowerCaseTokenizer(Version matchVersion, Reader in) {
     super(matchVersion, in);
   }

   /**
    * Construct a new LowerCaseTokenizer using a given
    * {@link org.apache.lucene.util.AttributeFactory}.
    *
    * @param factory
    *          the attribute factory to use for this {@link Tokenizer}
    * @param in
    *          the input to split up into tokens
    */
   public LowerCaseTokenizer(AttributeFactory factory, Reader in) {
     super(factory, in);
   }

   /**
    * @deprecated Use {@link #LowerCaseTokenizer(AttributeFactory,Reader)}
    */
   @Deprecated
   public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
     super(matchVersion, factory, in);
   }

   /** Converts char to lower case
    * {@link Character#toLowerCase(int)}.*/
   @Override
   protected int normalize(int c) {
     return Character.toLowerCase(c);
   }
 }
	package org.apache.lucene.analysis.core;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.Reader;

	import org.apache.lucene.analysis.Tokenizer;
	import org.apache.lucene.analysis.util.CharTokenizer;
	import org.apache.lucene.util.AttributeFactory;
	import org.apache.lucene.util.Version;

	/**
	* LowerCaseTokenizer performs the function of LetterTokenizer
	* and LowerCaseFilter together. It divides text at non-letters and converts
	* them to lower case. While it is functionally equivalent to the combination
	* of LetterTokenizer and LowerCaseFilter, there is a performance advantage
	* to doing the two tasks at once, hence this (redundant) implementation.
	* <P>
	* Note: this does a decent job for most European languages, but does a terrible
	* job for some Asian languages, where words are not separated by spaces.
	* </p>
	* <p>
	* <a name="version"/>
	* You may specify the required {@link Version} compatibility when creating
	* {@link LowerCaseTokenizer}:
	* <ul>
	* <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
	* detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
	* {@link CharTokenizer#normalize(int)} for details.</li>
	* </ul>
	* </p>
	*/
	public final class LowerCaseTokenizer extends LetterTokenizer {

	/**
	* Construct a new LowerCaseTokenizer.
	* @param in
	* the input to split up into tokens
	*/
	public LowerCaseTokenizer(Reader in) {
	super(in);
	}

	/**
	* @deprecated Use {@link #LowerCaseTokenizer(Reader)}
	*/
	@Deprecated
	public LowerCaseTokenizer(Version matchVersion, Reader in) {
	super(matchVersion, in);
	}

	/**
	* Construct a new LowerCaseTokenizer using a given
	* {@link org.apache.lucene.util.AttributeFactory}.
	*
	* @param factory
	* the attribute factory to use for this {@link Tokenizer}
	* @param in
	* the input to split up into tokens
	*/
	public LowerCaseTokenizer(AttributeFactory factory, Reader in) {
	super(factory, in);
	}

	/**
	* @deprecated Use {@link #LowerCaseTokenizer(AttributeFactory,Reader)}
	*/
	@Deprecated
	public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
	super(matchVersion, factory, in);
	}

	/** Converts char to lower case
	* {@link Character#toLowerCase(int)}.*/
	@Override
	protected int normalize(int c) {
	return Character.toLowerCase(c);
	}
	}