| /******************************************************************************* |
| * Copyright (c) 2005, 2006 Erkki Lindpere and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Erkki Lindpere - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.ecf.internal.provider.phpbb; |
| |
| import java.net.URL; |
| import java.text.DateFormat; |
| import java.text.ParseException; |
| import java.text.SimpleDateFormat; |
| import java.util.ArrayList; |
| import java.util.Calendar; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.eclipse.ecf.bulletinboard.BBException; |
| import org.eclipse.ecf.core.identity.ID; |
| import org.eclipse.ecf.core.identity.IDCreateException; |
| import org.eclipse.ecf.core.identity.Namespace; |
| import org.eclipse.ecf.internal.bulletinboard.commons.AbstractParser; |
| import org.eclipse.ecf.internal.bulletinboard.commons.IBBObjectFactory; |
| import org.eclipse.ecf.internal.bulletinboard.commons.parsing.DefaultPatternDescriptor; |
| import org.eclipse.ecf.internal.bulletinboard.commons.parsing.IPatternDescriptor; |
| import org.eclipse.ecf.internal.bulletinboard.commons.util.StringUtil; |
| import org.eclipse.ecf.internal.provider.phpbb.identity.ThreadMessageID; |
| |
| /** |
| * NB! use new String(Matcher.group(int)) instead of Matcher.group(int) |
| * |
| * @author Erkki |
| */ |
| public class PHPBBParser extends AbstractParser { |
| |
| public PHPBBParser(Namespace namespace, URL baseURL) { |
| super(namespace, baseURL); |
| } |
| |
| public static final Pattern PAT_PHPBB_SIGNATURE = Pattern.compile("<span class=\"copyright\">(.*)</span>", Pattern.DOTALL); |
| |
| public static final String PHPBB_SIGNATURE = "Powered by phpBB"; |
| |
| public boolean isServiceSupported(final CharSequence seq) { |
| final Matcher m = PAT_PHPBB_SIGNATURE.matcher(seq); |
| if (m.find()) { |
| String copyright = m.group(1); |
| copyright = StringUtil.stripHTMLTrim(copyright); |
| return copyright.contains(PHPBB_SIGNATURE); |
| } |
| return false; |
| } |
| |
| public static final Pattern PAT_FORUM_OR_CATEGORY = Pattern.compile("(?:" + "<span class=\"forumlink\"> <a href=\"viewforum.php\\?f=([0-9]+)(?:.*)\" class=\"forumlink\">(.*)</a><br />" + "(?:\\s*)</span> <span class=\"genmed\">(?s)(.*?)</span>" + ")|(?:" + "<a href=\"index.php\\?c=([0-9]+)(?:.*)\" class=\"cattitle\">(.*)</a>" + ")"); |
| |
| /** |
| * Parses forum HTML output into a list of forums. |
| * @param seq |
| * @return map of ID -> Forum associations. |
| */ |
| public Map<ID, Forum> parseForums(final CharSequence seq) { |
| final Map<ID, Forum> forums = new LinkedHashMap<ID, Forum>(); |
| final Matcher matcher = PAT_FORUM_OR_CATEGORY.matcher(seq); |
| Category lastCat = null; |
| while (matcher.find()) { |
| // Matched forum |
| if (matcher.group(2) != null) { |
| final String name = StringUtil.stripHTMLTrim(matcher.group(2)); |
| final String desc = StringUtil.stripHTMLTrim(matcher.group(3)); |
| if (StringUtil.notEmptyStr(name)) { |
| final ForumFactory ff = new ForumFactory(); |
| final String idStr = matcher.group(1); |
| ID id = null; |
| try { |
| id = ff.createBBObjectId(namespace, baseURL, idStr); |
| } catch (final IDCreateException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } |
| final Forum forum = (Forum) ff.createBBObject(id, name, null); |
| forum.setDescription(desc); |
| if (lastCat != null) { |
| lastCat.addSubForum(forum); |
| forum.setParent(lastCat); |
| } |
| forums.put(id, forum); |
| } |
| } |
| // Matched category |
| if (matcher.group(5) != null) { |
| final String name = StringUtil.stripHTMLTrim(matcher.group(5)); |
| if (StringUtil.notEmptyStr(name)) { |
| final CategoryFactory cf = new CategoryFactory(); |
| final String idStr = matcher.group(4); |
| ID id = null; |
| try { |
| id = cf.createBBObjectId(namespace, baseURL, idStr); |
| } catch (final NumberFormatException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } catch (final IDCreateException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } |
| lastCat = (Category) cf.createBBObject(id, name, null); |
| forums.put(id, lastCat); |
| } |
| } |
| } |
| return forums; |
| } |
| |
| @Override |
| public IBBObjectFactory getThreadFactory() { |
| return new ThreadFactory(); |
| } |
| |
| @Override |
| public IPatternDescriptor getThreadPattern() { |
| return DefaultPatternDescriptor.defaultCustom(Pattern.compile("<a href=\"viewtopic.php\\?t=([0-9]+)(?:.*?)\" class=\"topictitle\">(.*)</a>(?:.*?)<span class=\"name\">(.+?)</span>", Pattern.DOTALL), new String[] {"id", "name", "authorInfo"}); |
| } |
| |
| @Override |
| public IBBObjectFactory getMemberFactory() { |
| return new MemberFactory(); |
| } |
| |
| public Pattern getMemberNamePattern() { |
| return Pattern.compile("Viewing profile :: (.*?)</th>"); |
| } |
| |
| @Override |
| public IPatternDescriptor getAuthorInfoMemberPattern() { |
| return DefaultPatternDescriptor.defaultIdAndName(Pattern.compile("<a href=\"profile.php\\?mode=viewprofile&u=([0-9]+?)\">(.*?)</a>")); |
| } |
| |
| @Override |
| public IBBObjectFactory getGuestFactory() { |
| return new GuestFactory(); |
| } |
| |
| /* |
| * |
| * <table class="forumline" width="100%" cellspacing="1" cellpadding="4" |
| * border="0"> <tr> <th class="thHead" height="25"><b>Information</b></th> |
| * |
| * </tr> <tr> <td class="row1"><table width="100%" cellspacing="0" |
| * cellpadding="1" border="0"> <tr> <td> </td> </tr> <tr> |
| * <td align="center"><span class="gen">The topic or post you requested |
| * does not exist</span></td> |
| * |
| * </tr> <tr> <td> </td> </tr> </table></td> </tr> </table> |
| */ |
| public static final Pattern PAT_MSG_INFORMATION = Pattern.compile("<table class=\"forumline\"(?:.*?)" + "<th class=\"thHead\"(?:.*?)><b>Information</b></th>(?:.*?)" + "<td align=\"center\"><span class=\"gen\">(.*?)</span></td>", Pattern.DOTALL); |
| |
| public String parseInformationMessage(CharSequence seq) { |
| String msg = null; |
| final Matcher m = PAT_MSG_INFORMATION.matcher(seq); |
| if (m.find()) { |
| msg = "PHPBB: " + m.group(1); |
| } |
| return msg; |
| } |
| |
| private BBException createPHPBBException(String msg, CharSequence seq) { |
| final String phpBBmsg = parseInformationMessage(seq); |
| if (phpBBmsg != null) { |
| return new BBException(msg, new PHPBBException(phpBBmsg)); |
| } else { |
| return new BBException(msg); |
| } |
| } |
| |
| public static final Pattern PAT_THEAD_ATTRS = Pattern |
| // .compile("<title>(?:.*?) :: View topic - (.*?)</title>"); |
| .compile("<a class=\"maintitle\" href=\"viewtopic.php\\?t=([0-9]+)(?:.*?)\">(.*?)</a>"); |
| |
| public static final Pattern PAT_THEAD_ATTRS_FORUM = Pattern.compile("<link rel=\"up\" href=\"viewforum.php\\?f=([0-9]+?)\" title=\"(.*?)\" />"); |
| |
| public static final IPatternDescriptor PD_THREAD_ATTRS = DefaultPatternDescriptor.defaultIdAndName(PAT_THEAD_ATTRS); |
| |
| public static final IPatternDescriptor PD_THREAD_ATTRS_FORUM = DefaultPatternDescriptor.defaultIdAndName(PAT_THEAD_ATTRS_FORUM); |
| |
| public Thread parseThreadPageForThreadAttributes(CharSequence seq) throws BBException { |
| final Thread t = (Thread) genericParser.parseSingleIdName(PD_THREAD_ATTRS, seq, new ThreadFactory()); |
| if (t != null) { |
| final Forum f = (Forum) genericParser.parseSingleIdName(PD_THREAD_ATTRS_FORUM, seq, new ForumFactory()); |
| t.forum = f; |
| return t; |
| } else { |
| throw createPHPBBException("Failed to parse the thread.", seq); |
| } |
| } |
| |
| public static final Pattern PAT_MSG_TIMESTAMP = Pattern.compile("Posted: (.*?)<span class=\"gen\"> </span>"); |
| |
| public static final Pattern PAT_MSG = Pattern.compile("<tr>(?:.*?)<td width=\"150\" align=\"left\" valign=\"top\" class=\"row(?:[12]{1})\"><span class=\"name\">(.*?)<script language=\"JavaScript\"", Pattern.DOTALL); |
| |
| public static final Pattern PAT_MSG_USERID = Pattern.compile("profile.php\\?mode=viewprofile&u=([0-9]+)"); |
| |
| public static final Pattern PAT_MSG_POSTID_USERNAME = Pattern.compile("<a name=\"([0-9]+)\"></a><b>(.*?)</b></span>"); |
| |
| public static final Pattern PAT_MSG_TITLE = Pattern.compile("Post subject: (.*?)</span>"); |
| |
| // <td colspan="2"><span class="postbody">test</span><span |
| // class="gensmall"></span></td> |
| public static final Pattern PAT_MSG_MESSAGE = Pattern.compile("<td colspan=\"2\"><span class=\"postbody\">(.*?)</span><span class=\"gensmall\"></span></td>", Pattern.DOTALL); |
| |
| public String parseMessageId(String msgContent) { |
| final Matcher matcher = PAT_MSG_POSTID_USERNAME.matcher(msgContent); |
| if (matcher.find()) { |
| return new String(matcher.group(1)); |
| } |
| return null; |
| } |
| |
| public void parseMessage(final CharSequence seq) { |
| /* |
| * String username = null; Matcher matcher; // Match date |
| * //message.setTime(parseTimestamp(str)); // Match user id and name |
| * matcher = PAT_MSG_POSTID_USERNAME.matcher(seq); if (matcher.find()) { |
| * username = StringUtil.simpleStripHTML(matcher.group(2)); |
| * message.setId(matcher.group(1)); } matcher = |
| * PAT_MSG_USERID.matcher(str); if (matcher.find()) { |
| * message.setAuthor(app.userFor(matcher.group(1), username)); } else { |
| * message.setAuthor(app.userFor(null, username)); } // Match title |
| * matcher = PAT_MSG_TITLE.matcher(seq); if (matcher.find()) { |
| * message.setTitle(matcher.group(1)); } // Match message matcher = |
| * PAT_MSG_MESSAGE.matcher(seq); if (matcher.find()) { |
| * message.setMessage(StringUtil.stripHTMLFullTrim(matcher.group(1))); } |
| */ |
| } |
| |
| public ThreadMessage parseRequestedMessage(final ThreadMessageID id, final CharSequence seq) throws BBException { |
| final ThreadMessageFactory tmf = new ThreadMessageFactory(); |
| // lastRead = -1 the one we want |
| ThreadMessageID lastReadId = null; |
| try { |
| lastReadId = (ThreadMessageID) tmf.createBBObjectId(namespace, baseURL, String.valueOf(id.getLongValue() - 1)); |
| } catch (final IDCreateException e) { |
| e.printStackTrace(); |
| } |
| final List<ThreadMessage> msgs = parseMessages2(seq, lastReadId, true); |
| if (msgs.size() > 0) { |
| return msgs.get(0); |
| } |
| return null; |
| } |
| |
| public List<ThreadMessage> parseMessages2(final CharSequence seq, final ThreadMessageID lastReadId, boolean desc) throws BBException { |
| Matcher m; |
| ThreadMessage msg; |
| final List<ThreadMessage> messages = new ArrayList<ThreadMessage>(); |
| m = PAT_MSG.matcher(seq); |
| while (m.find()) { |
| final String msgSrc = m.group(1); |
| msg = parseMessage2(msgSrc, lastReadId); |
| if (msg != null) { |
| if (desc) { |
| messages.add(0, msg); |
| } else { |
| messages.add(msg); |
| } |
| } |
| } |
| return messages; |
| } |
| |
| @Override |
| public Long parseTimestamp(CharSequence seq) { |
| Long l = null; |
| final Locale locale = Locale.ENGLISH; |
| final String dateFormat = "EEE MMM d, yyyy"; |
| final String timeFormat = "h:mm aa"; |
| final String dateTimeSeparator = " "; |
| final DateFormat fmtTimestamp = new SimpleDateFormat(dateFormat + dateTimeSeparator + timeFormat, locale); |
| final DateFormat fmtTime = new SimpleDateFormat(timeFormat, locale); |
| final String timestamp = new StringBuilder(seq).toString(); |
| /* |
| * timestamp = timestamp.replaceAll("1st", "1"); timestamp = |
| * timestamp.replaceAll("2nd", "2"); timestamp = |
| * timestamp.replaceAll("3rd", "3"); timestamp = |
| * timestamp.replaceAll("th", ""); |
| */ |
| if (timestamp.startsWith("Today") || timestamp.startsWith("Yesterday")) { |
| final String[] s = timestamp.split(dateTimeSeparator); |
| try { |
| final Calendar now = Calendar.getInstance(/* fmtTime.getTimeZone() */); |
| if ("Yesterday".equals(s[0])) { |
| now.add(Calendar.DATE, -1); |
| } |
| |
| final Date d = fmtTime.parse(s[1]); |
| final Calendar then = Calendar.getInstance(fmtTime.getTimeZone()); |
| then.setTime(d); |
| then.set(now.get(Calendar.YEAR), now.get(Calendar.MONTH), now.get(Calendar.DATE)); |
| l = new Long(then.getTimeInMillis()); |
| } catch (final ParseException e) { |
| e.printStackTrace(); |
| } |
| } else { |
| try { |
| l = new Long(fmtTimestamp.parse(timestamp).getTime()); |
| } catch (final ParseException e) { |
| e.printStackTrace(); |
| } |
| } |
| return l; |
| } |
| |
| private ThreadMessage parseMessage2(final CharSequence seq, final ThreadMessageID lastReadId) { |
| ThreadMessage msg = null; |
| Matcher m; |
| m = PAT_MSG_POSTID_USERNAME.matcher(seq); |
| if (m.find()) { |
| final ThreadMessageFactory tmf = new ThreadMessageFactory(); |
| String idStr = m.group(1); |
| ThreadMessageID id = null; |
| try { |
| id = (ThreadMessageID) tmf.createBBObjectId(namespace, baseURL, idStr); |
| } catch (final IDCreateException e1) { |
| // TODO Auto-generated catch block |
| e1.printStackTrace(); |
| } |
| if (lastReadId == null || id.getLongValue() > lastReadId.getLongValue()) { |
| final String uname = new String(m.group(2)); |
| msg = new ThreadMessage(id, null); |
| m = PAT_MSG_TIMESTAMP.matcher(seq); |
| if (m.find()) { |
| msg.timePosted = new Date(parseTimestamp(new String(m.group(1))).longValue()); |
| } |
| m = PAT_MSG_TITLE.matcher(seq); |
| m.find(); |
| msg.setNameInternal(new String(m.group(1))); |
| m = PAT_MSG_MESSAGE.matcher(seq); |
| m.find(); |
| final String message = StringUtil.stripHTMLFullTrim(m.group(1)); |
| msg.message = message; |
| m = PAT_MEMBER_ID_FROM_LINK.matcher(seq); |
| if (m.find()) { |
| final MemberFactory mf = new MemberFactory(); |
| idStr = m.group(1); |
| ID id2 = null; |
| try { |
| id2 = mf.createBBObjectId(namespace, baseURL, idStr); |
| } catch (final NumberFormatException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } catch (final IDCreateException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } |
| msg.author = new Member(id2, uname); |
| } else { |
| final GuestFactory gf = new GuestFactory(); |
| ID id2 = null; |
| try { |
| id2 = gf.createBBObjectId(namespace, baseURL, null); |
| } catch (final IDCreateException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } |
| msg.author = new Member(id2, uname); |
| } |
| } |
| } |
| return msg; |
| } |
| |
| public Map<ID, ThreadMessage> parseMessages(final CharSequence seq, final boolean newOnly) throws BBException { |
| Matcher matcher; |
| String title; |
| ThreadMessage msg; |
| final Map<ID, ThreadMessage> messages = new HashMap<ID, ThreadMessage>(); |
| matcher = PAT_MSG.matcher(seq); |
| boolean anyFound = false; |
| while (matcher.find()) { |
| anyFound = true; |
| title = StringUtil.stripHTMLTrim(matcher.group(3)); |
| if (StringUtil.notEmptyStr(title)) { |
| final ThreadMessageFactory tmf = new ThreadMessageFactory(); |
| final String idStr = matcher.group(1); |
| ID id = null; |
| try { |
| id = tmf.createBBObjectId(namespace, baseURL, idStr); |
| } catch (final NumberFormatException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } catch (final IDCreateException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } |
| msg = (ThreadMessage) tmf.createBBObject(id, title, null); |
| messages.put(id, msg); |
| } |
| } |
| if (!anyFound) { |
| throw createPHPBBException("No messages found!", seq); |
| } |
| return messages; |
| } |
| |
| public static final Pattern PAT_PAGES = Pattern.compile("<span class=\"nav\">Page <b>([0-9]+)</b> of <b>([0-9]+)</b></span>"); |
| |
| public int parseNextPage(CharSequence seq) { |
| final Matcher m = PAT_PAGES.matcher(seq); |
| int next = -1; |
| if (m.find()) { |
| final int current = Integer.parseInt(m.group(1)); |
| final int last = Integer.parseInt(m.group(2)); |
| if (current < last) { |
| next = current + 1; |
| } |
| } |
| return next; |
| } |
| |
| public int parsePrevPage(CharSequence seq) { |
| final Matcher m = PAT_PAGES.matcher(seq); |
| int prev = -1; |
| if (m.find()) { |
| final int current = Integer.parseInt(m.group(1)); |
| if (current > 1) { |
| prev = current - 1; |
| } |
| } |
| return prev; |
| } |
| |
| /* |
| * <tr> <td class="row1" width="20%"><span class="gen">Group name:</span></td> |
| * <td class="row2"><span class="gen"><b>Zerobot</b></span></td> |
| * |
| * </tr> <tr> <td class="row1" width="20%"><span class="gen">Group |
| * description:</span></td> <td class="row2"><span class="gen">Zerobot |
| * identities</span></td> </tr> |
| */ |
| public static final Pattern PAT_GROUP = Pattern.compile("<form action=\"groupcp.php\\?g=([0-9]+?)\" method=\"post\">" + "(?:.*?)<tr>" + "(?:.*?)<td class=\"row1\"(?:.*?)><span class=\"gen\">Group name:</span></td>" + "(?:.*?)<td class=\"row2\"(?:.*?)><span class=\"gen\">(.*?)</span></td>" + "(?:.*?)</tr>" + "(?:.*?)<tr>" + "(?:.*?)<td class=\"row1\"(?:.*?)><span class=\"gen\">Group description:</span></td>" + "(?:.*?)<td class=\"row2\"(?:.*?)><span class=\"gen\">(.*?)</span></td>" + "(?:.*?)</tr>" + "(?:.*?)</form>", Pattern.DOTALL); |
| |
| public MemberGroup parseMemberGroup(CharSequence seq) { |
| final Matcher m = PAT_GROUP.matcher(seq); |
| if (m.find()) { |
| final MemberGroupFactory mgf = new MemberGroupFactory(); |
| final String idStr = m.group(1); |
| final String name = StringUtil.stripHTMLTrim(m.group(2)); |
| final String desc = StringUtil.stripHTMLTrim(m.group(3)); |
| ID id = null; |
| try { |
| id = mgf.createBBObjectId(namespace, baseURL, idStr); |
| } catch (final IDCreateException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } |
| final MemberGroup grp = (MemberGroup) mgf.createBBObject(id, name, null); |
| grp.setDescription(desc); |
| return grp; |
| } |
| return null; |
| } |
| |
| /* |
| * <select name="g"><option value="7">Unit Test Group</option><option |
| * value="4">Zerobot</option></select> TODO this didn't work for several |
| * groups, so I split into two patterns |
| */ |
| public static final Pattern PAT_GROUPS = Pattern.compile("<select name=\"g\">" + "(?:<option value=\"([0-9]+?)\">(.*?)</option>?)" + "</select>"); |
| |
| public static final Pattern PAT_GROUPS_GROUP = Pattern.compile("<option value=\"([0-9]+?)\">(.*?)</option>"); |
| |
| @Override |
| public IBBObjectFactory getMemberGroupFactory() { |
| return new MemberGroupFactory(); |
| } |
| |
| public Pattern getMemberGroupContainerPattern() { |
| return Pattern.compile("<select name=\"g\">" + "(?:<option value=\"([0-9]+?)\">(.*?)</option>?)" + "</select>"); |
| } |
| |
| public IPatternDescriptor getMemberGroupPattern() { |
| return DefaultPatternDescriptor.defaultIdAndName(Pattern.compile("<option value=\"([0-9]+?)\">(.*?)</option>")); |
| } |
| |
| @Deprecated |
| private static final Pattern PAT_MEMBER_ID_FROM_LINK = Pattern.compile("<a href=\"profile.php\\?mode=viewprofile&u=([0-9]+?)\">"); |
| |
| public IPatternDescriptor getMemberPattern() { |
| return DefaultPatternDescriptor.defaultIdAndName(Pattern.compile("<a href=\"profile.php\\?mode=viewprofile&u=([0-9]+?)\" class=\"gen\">(.*?)</a>")); |
| } |
| |
| private static final Pattern PAT_TITLE = Pattern.compile("<title>(.*?)</title>"); |
| |
| public String parseTitle(CharSequence seq) { |
| final Matcher m = PAT_TITLE.matcher(seq); |
| if (m.find()) { |
| final String title = new String(m.group(1)); |
| return title; |
| } |
| return null; |
| } |
| |
| @Override |
| public void throwException(final String msg, final CharSequence seq) throws BBException { |
| throw createPHPBBException(msg, seq); |
| } |
| |
| } |