moxy/social-binding/src/main/java/eclipselink/example/moxy/socialbinding/util/KeywordExtractor.java - eclipselink/examples - Git at Google

 /*******************************************************************************
  * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
  * This program and the accompanying materials are made available under the
  * terms of the Eclipse Public License v1.0 and Eclipse Distribution License v. 1.0
  * which accompanies this distribution.
  *
  * The Eclipse Public License is available at http://www.eclipse.org/legal/epl-v10.html
  * and the Eclipse Distribution License is available at
  * http://www.eclipse.org/org/documents/edl-v10.php.
  ******************************************************************************/
 package eclipselink.example.moxy.socialbinding.util;

 import java.io.BufferedReader;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.StringTokenizer;

 /**
  * Simple utility class to extract and prioritize a set of keywords based on the
  * title provided.
  *
  * @author rbarkhous
  * @since EclipseLink 2.4.2
  */
 public class KeywordExtractor {

     /**
      * Return the longest word in the title (preferably a capitalized word).
      */
     public static String extractKeywords(String postTitle) {
         ArrayList<String> allWords = new WordList();
         ArrayList<String> upperCaseWords = new WordList();

         ArrayList<String> excludeWords = buildExlucdeWordsList();

         StringTokenizer tokenizer = new StringTokenizer(postTitle, ",.!?():;-[]'\" \t\n\r\f/");
         while (tokenizer.hasMoreElements()) {
             String word = tokenizer.nextToken();
             if (!excludeWords.contains(word)) {
                 allWords.add(word);
                 if (Character.isUpperCase(word.toCharArray()[0])) {
                     upperCaseWords.add(word);
                 }
             }
         }

         StringLengthComparator comparator = new StringLengthComparator();
         Collections.sort(allWords, comparator);
         Collections.sort(upperCaseWords, comparator);

         if (upperCaseWords.size() > 1) {
             return upperCaseWords.get(0);
         } else {
             return allWords.get(0);
         }
    }

     private static ArrayList<String> buildExlucdeWordsList() {
         ArrayList<String> excludeWords = new WordList();

         try {
             ClassLoader cl = Thread.currentThread().getContextClassLoader();
             InputStream is = cl.getResourceAsStream("META-INF/exclude-words.txt");
             InputStreamReader isr = new InputStreamReader(is);
             BufferedReader br = new BufferedReader(isr);
             String line;
             while ((line = br.readLine()) != null) {
                 excludeWords.add(line);
             }
             br.close();
         } catch (Exception e) {
             e.printStackTrace();
         }

         return excludeWords;
     }

     private static class StringLengthComparator implements Comparator<String> {
         public int compare(String o1, String o2) {
             if (o1.length() > o2.length()) {
                 return -1;
             } else if (o1.length() < o2.length()) {
                 return 1;
             } else {
                 return 0;
             }
         }
     }

     private static class WordList extends ArrayList<String> {
         private static final long serialVersionUID = 4780991427891054829L;

         @Override
         public boolean contains(Object o) {
             String s = (String) o;
             for (String string : this) {
                 if (s.equalsIgnoreCase(string)) return true;
             }
             return false;
         }
     }

 }
	/*******************************************************************************
	* Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
	* This program and the accompanying materials are made available under the
	* terms of the Eclipse Public License v1.0 and Eclipse Distribution License v. 1.0
	* which accompanies this distribution.
	*
	* The Eclipse Public License is available at http://www.eclipse.org/legal/epl-v10.html
	* and the Eclipse Distribution License is available at
	* http://www.eclipse.org/org/documents/edl-v10.php.
	******************************************************************************/
	package eclipselink.example.moxy.socialbinding.util;

	import java.io.BufferedReader;
	import java.io.InputStream;
	import java.io.InputStreamReader;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.StringTokenizer;

	/**
	* Simple utility class to extract and prioritize a set of keywords based on the
	* title provided.
	*
	* @author rbarkhous
	* @since EclipseLink 2.4.2
	*/
	public class KeywordExtractor {

	/**
	* Return the longest word in the title (preferably a capitalized word).
	*/
	public static String extractKeywords(String postTitle) {
	ArrayList<String> allWords = new WordList();
	ArrayList<String> upperCaseWords = new WordList();

	ArrayList<String> excludeWords = buildExlucdeWordsList();

	StringTokenizer tokenizer = new StringTokenizer(postTitle, ",.!?():;-[]'\" \t\n\r\f/");
	while (tokenizer.hasMoreElements()) {
	String word = tokenizer.nextToken();
	if (!excludeWords.contains(word)) {
	allWords.add(word);
	if (Character.isUpperCase(word.toCharArray()[0])) {
	upperCaseWords.add(word);
	}
	}
	}

	StringLengthComparator comparator = new StringLengthComparator();
	Collections.sort(allWords, comparator);
	Collections.sort(upperCaseWords, comparator);

	if (upperCaseWords.size() > 1) {
	return upperCaseWords.get(0);
	} else {
	return allWords.get(0);
	}
	}

	private static ArrayList<String> buildExlucdeWordsList() {
	ArrayList<String> excludeWords = new WordList();

	try {
	ClassLoader cl = Thread.currentThread().getContextClassLoader();
	InputStream is = cl.getResourceAsStream("META-INF/exclude-words.txt");
	InputStreamReader isr = new InputStreamReader(is);
	BufferedReader br = new BufferedReader(isr);
	String line;
	while ((line = br.readLine()) != null) {
	excludeWords.add(line);
	}
	br.close();
	} catch (Exception e) {
	e.printStackTrace();
	}

	return excludeWords;
	}

	private static class StringLengthComparator implements Comparator<String> {
	public int compare(String o1, String o2) {
	if (o1.length() > o2.length()) {
	return -1;
	} else if (o1.length() < o2.length()) {
	return 1;
	} else {
	return 0;
	}
	}
	}

	private static class WordList extends ArrayList<String> {
	private static final long serialVersionUID = 4780991427891054829L;

	@Override
	public boolean contains(Object o) {
	String s = (String) o;
	for (String string : this) {
	if (s.equalsIgnoreCase(string)) return true;
	}
	return false;
	}
	}

	}