social-binding: improved KeywordExtractor
diff --git a/moxy/social-binding/src/main/java/eclipselink/example/moxy/socialbinding/util/KeywordExtractor.java b/moxy/social-binding/src/main/java/eclipselink/example/moxy/socialbinding/util/KeywordExtractor.java
index a170cf2..f15df0d 100644
--- a/moxy/social-binding/src/main/java/eclipselink/example/moxy/socialbinding/util/KeywordExtractor.java
+++ b/moxy/social-binding/src/main/java/eclipselink/example/moxy/socialbinding/util/KeywordExtractor.java
@@ -10,6 +10,9 @@
  ******************************************************************************/
 package eclipselink.example.moxy.socialbinding.util;
 
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -24,29 +27,55 @@
  */
 public class KeywordExtractor {
 
+    /**
+     * Return the longest word in the title (preferably a capitalized word).
+     */
     public static String extractKeywords(String postTitle) {
-        StringTokenizer tokenizer = new StringTokenizer(postTitle, ",.!?()[]'\" \t\n\r\f/");
+        ArrayList<String> allWords = new WordList();
+        ArrayList<String> upperCaseWords = new WordList();        
 
-        ArrayList<String> words = new ArrayList<String>();
+        ArrayList<String> excludeWords = buildExlucdeWordsList();
 
+        StringTokenizer tokenizer = new StringTokenizer(postTitle, ",.!?():;-[]'\" \t\n\r\f/");
         while (tokenizer.hasMoreElements()) {
-            String token = tokenizer.nextToken();
-            if (token.length() > 3 || token.toUpperCase().equals(token)) {
-                words.add(token);
+            String word = tokenizer.nextToken();
+            if (!excludeWords.contains(word)) {
+                allWords.add(word);
+                if (Character.isUpperCase(word.toCharArray()[0])) {
+                    upperCaseWords.add(word);
+                }
             }
         }
+        
+        StringLengthComparator comparator = new StringLengthComparator();
+        Collections.sort(allWords, comparator);
+        Collections.sort(upperCaseWords, comparator);
 
-        // Sort words, longest one first
-        Collections.sort(words, new StringLengthComparator());
-
-        String keywords = null;
-        if (words.size() > 1) {
-            keywords = words.get(0) + "," + words.get(1);
+        if (upperCaseWords.size() > 1) {
+            return upperCaseWords.get(0);
         } else {
-            keywords = words.get(0);
+            return allWords.get(0);
         }
+   }
+    
+    private static ArrayList<String> buildExlucdeWordsList() {
+        ArrayList<String> excludeWords = new WordList();
 
-        return keywords;
+        try {
+            ClassLoader cl = Thread.currentThread().getContextClassLoader();
+            InputStream is = cl.getResourceAsStream("META-INF/exclude-words.txt");
+            InputStreamReader isr = new InputStreamReader(is);
+            BufferedReader br = new BufferedReader(isr);
+            String line;
+            while ((line = br.readLine()) != null) {
+                excludeWords.add(line);
+            }
+            br.close();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        
+        return excludeWords;
     }
 
     private static class StringLengthComparator implements Comparator<String> {
@@ -60,5 +89,18 @@
             }
         }
     }
+    
+    private static class WordList extends ArrayList<String> {
+        private static final long serialVersionUID = 4780991427891054829L;
 
+        @Override
+        public boolean contains(Object o) {
+            String s = (String) o;
+            for (String string : this) {
+                if (s.equalsIgnoreCase(string)) return true;
+            }
+            return false;
+        }
+    }
+    
 }
\ No newline at end of file
diff --git a/moxy/social-binding/src/main/resources/META-INF/exclude-words.txt b/moxy/social-binding/src/main/resources/META-INF/exclude-words.txt
new file mode 100644
index 0000000..e2af3bf
--- /dev/null
+++ b/moxy/social-binding/src/main/resources/META-INF/exclude-words.txt
@@ -0,0 +1,184 @@
+a
+able
+about
+across
+after
+ain't
+all
+almost
+also
+am
+among
+an
+and
+any
+are
+aren't
+as
+at
+be
+because
+been
+but
+by
+can
+can't
+cannot
+could
+could've
+couldn't
+dear
+did
+didn't
+do
+does
+doesn't
+don't
+either
+else
+ever
+every
+for
+from
+get
+got
+had
+has
+hasn't
+have
+he
+he'd
+he'll
+he's
+her
+hers
+him
+his
+how
+how'd
+how'll
+how's
+however
+i
+i'd
+i'll
+i'm
+i've
+if
+in
+into
+is
+isn't
+it
+it's
+its
+just
+least
+let
+like
+likely
+may
+me
+might
+might've
+mightn't
+most
+must
+must've
+mustn't
+my
+neither
+no
+nor
+not
+of
+off
+often
+on
+only
+or
+other
+our
+own
+rather
+said
+say
+says
+shan't
+she
+she'd
+she'll
+she's
+should
+should've
+shouldn't
+since
+so
+some
+than
+that
+that'll
+that's
+the
+their
+them
+then
+there
+there's
+these
+they
+they'd
+they'll
+they're
+they've
+this
+tis
+to
+too
+twas
+us
+wants
+was
+wasn't
+we
+we'd
+we'll
+we're
+were
+weren't
+what
+what'd
+what's
+when
+when
+when'd
+when'll
+when's
+where
+where'd
+where'll
+where's
+which
+while
+who
+who'd
+who'll
+who's
+whom
+why
+why'd
+why'll
+why's
+will
+with
+won't
+would
+would've
+wouldn't
+yet
+you
+you'd
+you'll
+you're
+you've
+your
\ No newline at end of file