blob: e62158596e11aa7ce3c0c02e42e1f818dfb2db29 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2003 - 2006 University Of British Columbia and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* University Of British Columbia - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.internal.bugzilla.core.internal;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import javax.security.auth.login.LoginException;
import org.eclipse.core.runtime.IStatus;
import org.eclipse.core.runtime.Status;
import org.eclipse.mylyn.bugzilla.core.AbstractRepositoryReportAttribute;
import org.eclipse.mylyn.bugzilla.core.BugReport;
import org.eclipse.mylyn.bugzilla.core.Comment;
import org.eclipse.mylyn.bugzilla.core.Operation;
import org.eclipse.mylyn.internal.bugzilla.core.BugzillaPlugin;
import org.eclipse.mylyn.internal.bugzilla.core.IBugzillaConstants;
import org.eclipse.mylyn.internal.bugzilla.core.internal.HtmlStreamTokenizer.Token;
/**
* @author Shawn Minto
* @author Mik Kersten (hardening of prototype)
* @author Rob Elves (attachments) This class parses bugs so that they can be
* displayed using the bug editor
*/
public class BugParser {
private static final String VALUE_ATTACHMENT_OBSOLETE = "bz_obsolete";
private static final String ATTRIBUTE_CLASS = "class";
private static final String TAG_SPAN = "span";
private static final String ATTRIBUTE_ID_TITLE = "title";
private static final String ATTRIBUTE_ID_HREF = "href";
private static final String ATTACHMENT_CGI_ID = "attachment.cgi?id=";
private static final String KEY_BUG_NUM = "Bug#";
private static final String KEY_RESOLUTION = "resolution";
private static final String KEY_VALUE = "value";
private static final String KEY_NAME = "name";
private static final String ATTR_CHARSET = "charset";
/** Parser for dates in the report */
private static SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm");
private static final String keywordsUrl = "describekeywords.cgi";
/**
* Parse the case where we have found an attribute name
*
* @param in
* The input stream for the bug
* @return The name of the attribute that we are parsing
* @throws IOException
*/
private static String parseAttributeName(HtmlStreamTokenizer tokenizer) throws IOException, ParseException {
StringBuffer sb = new StringBuffer();
parseTableCell(tokenizer, sb);
HtmlStreamTokenizer.unescape(sb);
// remove the colon if there is one
if (sb.length() > 0 && sb.charAt(sb.length() - 1) == ':') {
sb.deleteCharAt(sb.length() - 1);
}
return sb.toString();
}
/**
* Parse the case where we have found attribute values
*
* @param in
* The input stream of the bug
* @param bug
* The bug report for the current bug
* @param attribute
* The name of the attribute
* @throws IOException
*/
private static void parseAttributeValue(BugReport bug, String originalAttributeName, HtmlStreamTokenizer tokenizer,
String serverUrl, String userName, String password) throws IOException, ParseException {
// NOTE: special rule to deal with change in 2.20.1
String attributeName = originalAttributeName;
if (attributeName.endsWith(KEY_BUG_NUM) && attributeName.length() > KEY_BUG_NUM.length()) {
attributeName = originalAttributeName.substring(attributeName.length() - KEY_BUG_NUM.length(),
attributeName.length());
}
Token token = tokenizer.nextToken();
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
// make sure that we are on a tag that we care about, not a label
// fix added so that we can parse the mozilla bug pages
if (tag.getTagType() == HtmlTag.Type.LABEL) {
token = tokenizer.nextToken();
if (token.getType() == Token.TAG)
tag = (HtmlTag) token.getValue();
else {
StringBuffer sb = new StringBuffer();
if (token.getType() == Token.TEXT) {
sb.append((StringBuffer) token.getValue());
parseAttributeValueCell(bug, attributeName, tokenizer, sb);
}
}
}
if (tag.getTagType() == HtmlTag.Type.SELECT && !tag.isEndTag()) {
String parameterName = tag.getAttribute(KEY_NAME);
parseSelect(bug, attributeName, parameterName, tokenizer);
} else if (tag.getTagType() == HtmlTag.Type.INPUT && !tag.isEndTag()) {
parseInput(bug, attributeName, tag, serverUrl, userName, password);
} else if (!tag.isEndTag() || attributeName.equalsIgnoreCase(KEY_RESOLUTION)) {
if (tag.isEndTag() && attributeName.equalsIgnoreCase(KEY_RESOLUTION)) {
AbstractRepositoryReportAttribute a = new AbstractRepositoryReportAttribute(attributeName);
a.setValue("");
bug.addAttribute(a);
}
parseAttributeValueCell(bug, attributeName, tokenizer);
}
} else {
StringBuffer sb = new StringBuffer();
if (token.getType() == Token.TEXT) {
sb.append((StringBuffer) token.getValue());
parseAttributeValueCell(bug, attributeName, tokenizer, sb);
}
}
}
/**
* Parse the case where the attribute value is just text in a table cell
*
* @param in
* The input stream of the bug
* @param bug
* The bug report for the current bug
* @param attributeName
* The name of the attribute that we are parsing
* @throws IOException
*/
private static void parseAttributeValueCell(BugReport bug, String attributeName, HtmlStreamTokenizer tokenizer)
throws IOException, ParseException {
StringBuffer sb = new StringBuffer();
parseAttributeValueCell(bug, attributeName, tokenizer, sb);
}
private static void parseAttributeValueCell(BugReport bug, String attributeName, HtmlStreamTokenizer tokenizer,
StringBuffer sb) throws IOException, ParseException {
parseTableCell(tokenizer, sb);
HtmlStreamTokenizer.unescape(sb);
// create a new attribute and set its value to the value that we
// retrieved
AbstractRepositoryReportAttribute a = new AbstractRepositoryReportAttribute(attributeName);
a.setValue(sb.toString());
// if we found an attachment attribute, forget about it, else add the
// attribute to the bug report
if (attributeName.toLowerCase()).startsWith("attachments")) {
// do nothing
} else {
if (attributeName.equals(KEY_BUG_NUM))
a.setValue(a.getValue().replaceFirst("alias:", ""));
bug.addAttribute(a);
}
}
/**
* Reads text into a StringBuffer until it encounters a close table cell tag
* (</TD>) or start of another cell. The text is appended to the
* existing value of the buffer. <b>NOTE:</b> Does not handle nested cells!
*
* @param tokenizer
* @param sb
* @throws IOException
* @throws ParseException
*/
private static void parseTableCell(HtmlStreamTokenizer tokenizer, StringBuffer sb) throws IOException,
ParseException {
boolean noWhitespace = false;
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.TD) {
if (!tag.isEndTag()) {
tokenizer.pushback(token);
}
break;
}
noWhitespace = token.getWhitespace().length() == 0;
} else if (token.getType() == Token.TEXT) {
// if there was no whitespace between the tag and the
// preceding text, don't insert whitespace before this text
// unless it is there in the source
if (!noWhitespace && token.getWhitespace().length() > 0 && sb.length() > 0) {
sb.append(' ');
}
sb.append((StringBuffer) token.getValue());
}
}
}
/**
* Parse the case where the attribute value is an option
*
* @param in
* The input stream for the bug
* @param bug
* The bug report for the current bug
* @param attribute
* The name of the attribute that we are parsing
* @param parameterName
* the SELECT tag's name
* @throws IOException
*/
private static void parseSelect(BugReport bug, String attributeName, String parameterName,
HtmlStreamTokenizer tokenizer) throws IOException, ParseException {
boolean first = false;
AbstractRepositoryReportAttribute a = new AbstractRepositoryReportAttribute(attributeName);
a.setID(parameterName);
Token token = tokenizer.nextToken();
while (token.getType() != Token.EOF) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.SELECT && tag.isEndTag())
break;
if (tag.getTagType() == HtmlTag.Type.OPTION && !tag.isEndTag()) {
String optionName = tag.getAttribute(KEY_VALUE);
boolean selected = tag.hasAttribute("selected");
StringBuffer optionText = new StringBuffer();
for (token = tokenizer.nextToken(); token.getType() == Token.TEXT; token = tokenizer.nextToken()) {
if (optionText.length() > 0) {
optionText.append(' ');
}
optionText.append((StringBuffer) token.getValue());
}
a.addOptionValue(optionText.toString(), optionName);
if (selected || first) {
a.setValue(optionText.toString());
first = false;
}
} else {
token = tokenizer.nextToken();
}
} else {
token = tokenizer.nextToken();
}
}
// if we parsed the cc field add the e-mails to the bug report else add
// the attribute to the bug report
if (attributeName.toLowerCase().startsWith("cc")) {
for (Iterator<String> it = a.getOptionValues().keySet().iterator(); it.hasNext();) {
String email = it.next();
bug.addCC(HtmlStreamTokenizer.unescape(email));
}
} else {
bug.addAttribute(a);
}
}
/**
* Parse the case where the attribute value is an input
*
* @param bug
* The bug report for the current bug
* @param attributeName
* The name of the attribute
* @param tag
* The INPUT tag
* @throws IOException
*/
private static void parseInput(BugReport bug, String attributeName, HtmlTag tag, String serverUrl, String userName,
String password) throws IOException {
AbstractRepositoryReportAttribute a = new AbstractRepositoryReportAttribute(attributeName);
a.setID(tag.getAttribute(KEY_NAME));
String name = tag.getAttribute(KEY_NAME);
String value = tag.getAttribute(KEY_VALUE);
if (value == null)
value = "";
// if we found the summary, add it to the bug report
if (name.equalsIgnoreCase("short_desc")) {
bug.setSummary(value);
} else if (name.equalsIgnoreCase("bug_file_loc")) {
a.setValue(value);
bug.addAttribute(a);
} else if (name.equalsIgnoreCase("newcc")) {
a.setValue(value);
bug.addAttribute(a);
} else {
// otherwise just add the attribute
a.setValue(value);
bug.addAttribute(a);
if (attributeName.equalsIgnoreCase("keywords") && serverUrl != null) {
BufferedReader input = null;
try {
String urlText = "";
// if we have a user name, may as well log in just in case
// it is required
if (userName != null && !userName.equals("") && password != null && !password.equals("")) {
/*
* The UnsupportedEncodingException exception for
* URLEncoder.encode() should not be thrown, since every
* implementation of the Java platform is required to
* support the standard charset "UTF-8"
*/
urlText += "?GoAheadAndLogIn=1&Bugzilla_login=" + URLEncoder.encode(userName, "UTF-8")
+ "&Bugzilla_password=" + URLEncoder.encode(password, "UTF-8");
}
// connect to the bugzilla server to get the keyword list
URL url = new URL(serverUrl + "/" + keywordsUrl + urlText);
URLConnection urlConnection = BugzillaPlugin.getDefault().getUrlConnection(url);
input = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
// parse the valid keywords and add them to the bug
List<String> keywords = new KeywordParser(input).getKeywords();
bug.setKeywords(keywords);
} catch (Exception e) {
// throw an exception if there is a problem reading the bug
// from the server
throw new IOException("Exception while fetching the list of keywords from the server: "
+ e.getMessage());
} finally {
try {
if (input != null)
input.close();
} catch (IOException e) {
BugzillaPlugin.log(new Status(IStatus.ERROR, IBugzillaConstants.PLUGIN_ID, IStatus.ERROR,
"Problem closing the stream", e));
}
}
}
}
}
/**
* Parse the case where we are dealing with the description
*
* @param bug
* The bug report for the bug
* @throws IOException
*/
private static void parseDescription(BugReport bug, HtmlStreamTokenizer tokenizer) throws IOException,
ParseException {
StringBuffer sb = new StringBuffer();
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (sb.length() > 0) {
sb.append(token.getWhitespace());
}
if (tag.getTagType() == HtmlTag.Type.PRE && tag.isEndTag())
break;
} else if (token.getType() == Token.TEXT) {
if (sb.length() > 0) {
sb.append(token.getWhitespace());
}
sb.append((StringBuffer) token.getValue());
}
}
// set the bug to have the description we retrieved
String text = HtmlStreamTokenizer.unescape(sb).toString();
bug.setDescription(text);
}
// /**
// * parses the description of an attachment on the report
// */
// private static String parseAttachementDescription(HtmlStreamTokenizer
// tokenizer) throws IOException,
// ParseException {
//
// StringBuffer sb = new StringBuffer();
// for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF;
// token = tokenizer.nextToken()) {
// if (token.getType() == Token.TAG) {
// HtmlTag tag = (HtmlTag) token.getValue();
// if (tag.getTagType() == HtmlTag.Type.A && tag.isEndTag())
// break;
// } else if (token.getType() == Token.TEXT) {
// if (sb.length() > 0) {
// sb.append(token.getWhitespace());
// }
// sb.append((StringBuffer) token.getValue());
// }
// }
//
// // set the bug to have the description we retrieved
// String text = HtmlStreamTokenizer.unescape(sb).toString();
// return text;
// }
/**
* Parse the case where we have found the start of a comment
*
* @param in
* The input stream of the bug
* @param bug
* The bug report for the current bug
* @return The comment that we have created with the information
* @throws IOException
* @throws ParseException
*/
private static Comment parseCommentHead(BugReport bug, HtmlStreamTokenizer tokenizer) throws IOException,
ParseException {
int number = 0;
Date date = null;
String author = null;
String authorName = null;
// get the comment's number
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.A) {
String href = tag.getAttribute(ATTRIBUTE_ID_HREF);
if (href != null) {
int index = href.toLowerCase().indexOf("#c");
if (index == -1)
continue;
token = tokenizer.nextToken();
number = Integer.parseInt(((StringBuffer) token.getValue()).toString().substring(1));
break;
}
}
}
}
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.A) {
String href = tag.getAttribute(ATTRIBUTE_ID_HREF);
if (href != null) {
int index = href.toLowerCase().indexOf("mailto");
if (index == -1)
continue;
author = href.substring(index + 7);
break;
}
}
}
}
// get the author's real name
StringBuffer sb = new StringBuffer();
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.A && tag.isEndTag())
break;
} else if (token.getType() == Token.TEXT) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append((StringBuffer) token.getValue());
}
}
authorName = sb.toString();
// get the comment's date
sb.setLength(0);
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.I && tag.isEndTag())
break;
} else if (token.getType() == Token.TEXT) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append((StringBuffer) token.getValue());
}
}
try {
if (sb.length() >= 16) {
date = df.parse(sb.substring(0, 16));
}
} catch (Exception e) {
date = Calendar.getInstance().getTime(); // XXX: could not
// determine date
}
return new Comment(bug, number, date, author, authorName);
}
/**
* Parse the case where we have comment text
*
* @param in
* The input stream for the bug
* @param bug
* The bug report for the current bug
* @param comment
* The comment to add the text to
* @throws IOException
*/
private static void parseCommentText(BugReport bug, Comment comment, HtmlStreamTokenizer tokenizer)
throws IOException, ParseException {
StringBuffer commentStringBuffer = new StringBuffer();
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagName().equals(TAG_SPAN)) {
if(tag.hasAttribute(ATTRIBUTE_CLASS) && tag.getAttribute(ATTRIBUTE_CLASS).equals("")) {
parseAttachment(commentStringBuffer, comment, tokenizer, false);
continue;
} else if(tag.hasAttribute(ATTRIBUTE_CLASS) && tag.getAttribute(ATTRIBUTE_CLASS).equals(VALUE_ATTACHMENT_OBSOLETE)) {
parseAttachment(commentStringBuffer, comment, tokenizer, true);
continue;
}
}
// added to ensure whitespace is not
// lost if adding a tag within a tag
if (commentStringBuffer.length() > 0) {
commentStringBuffer.append(token.getWhitespace());
}
if (tag.getTagType() == HtmlTag.Type.PRE && tag.isEndTag())
break;
} else if (token.getType() == Token.TEXT) {
if (commentStringBuffer.length() > 0) {
commentStringBuffer.append(token.getWhitespace());
}
commentStringBuffer.append((StringBuffer) token.getValue());
}
// remove attachment description from comment body
if (comment.hasAttachment() && commentStringBuffer.indexOf(comment.getAttachmentDescription()) == 0) {
commentStringBuffer = new StringBuffer();
}
}
HtmlStreamTokenizer.unescape(commentStringBuffer);
comment.setText(commentStringBuffer.toString());
bug.addComment(comment);
}
private static void parseAttachment(StringBuffer stringBuffer, Comment comment, HtmlStreamTokenizer tokenizer, boolean obsolete)
throws IOException, ParseException {
int attachmentID = -1;
String attachmentDescription = "";
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.A && !comment.hasAttachment()) {
if (tag.getAttribute(ATTRIBUTE_ID_HREF) != null) {
String link = tag.getAttribute(ATTRIBUTE_ID_HREF);
if (link.startsWith(ATTACHMENT_CGI_ID)) {
try {
int endIndex = link.indexOf("&");
if (endIndex > 0 && endIndex < link.length()) {
attachmentID = Integer.parseInt(link
.substring(ATTACHMENT_CGI_ID.length(), endIndex));
}
} catch (NumberFormatException e) {
return;
}
}
if (tag.getAttribute(ATTRIBUTE_ID_TITLE) != null) {
attachmentDescription = tag.getAttribute(ATTRIBUTE_ID_TITLE);
}
if (attachmentID > 0) {
comment.setHasAttachment(true);
comment.setAttachmentId(attachmentID);
comment.setAttachmentDescription(attachmentDescription);
comment.setObsolete(obsolete);
}
}
}
if (tag.getTagName().equals(TAG_SPAN) && tag.isEndTag())
break;
}
}
}
/**
* Parse the full html version of the bug
*
* @param in -
* the input stream for the bug
* @param id -
* the id of the bug that is to be parsed
* @return A bug report for the bug that was parsed
* @throws IOException
* @throws ParseException
*/
public static BugReport parseBug(Reader in, int id, String serverUrl, boolean is218, String userName,
String password, String contentType) throws IOException, ParseException, LoginException {
// create a new bug report and set the parser state to the start state
BugReport bug = new BugReport(id, serverUrl);
boolean contentTypeResolved = false;
if (contentType != null) {
String charsetFromContentType = getCharsetFromString(contentType);
if (charsetFromContentType != null) {
bug.setCharset(charsetFromContentType);
contentTypeResolved = true;
}
}
ParserState state = ParserState.START;
Comment comment = null;
String attribute = null;
HtmlStreamTokenizer tokenizer = new HtmlStreamTokenizer(in, null);
boolean isTitle = false;
boolean possibleBadLogin = false;
boolean checkBody = false;
String title = "";
StringBuffer body = new StringBuffer();
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
// get the charset from the HTML if not specified
if (!contentTypeResolved) {
if (token.getType() == Token.TAG && ((HtmlTag) (token.getValue())).getTagType() == HtmlTag.Type.META
&& !((HtmlTag) (token.getValue())).isEndTag()) {
String charsetFromHtml = getCharsetFromString(token.toString());
if (charsetFromHtml != null)
bug.setCharset(charsetFromHtml);
}
}
// make sure that bugzilla doesn't want us to login
if (token.getType() == Token.TAG && ((HtmlTag) (token.getValue())).getTagType() == HtmlTag.Type.TITLE
&& !((HtmlTag) (token.getValue())).isEndTag()) {
isTitle = true;
continue;
}
if (isTitle) {
// get all of the data in the title tag
if (token.getType() != Token.TAG) {
title += ((StringBuffer) token.getValue()).toString().toLowerCase() + " ";
continue;
} else if (token.getType() == Token.TAG
&& ((HtmlTag) token.getValue()).getTagType() == HtmlTag.Type.TITLE
&& ((HtmlTag) token.getValue()).isEndTag()) {
// check and see if the title seems as though we have wrong
// login info
if (title.indexOf("login") != -1
|| (title.indexOf("invalid") != -1 && title.indexOf("password") != -1)
|| title.indexOf("check e-mail") != -1)
possibleBadLogin = true; // we possibly have a bad
// login
// if the title starts with error, we may have a login
// problem, or
// there is a problem with the bug (doesn't exist), so we
// must do
// some more checks
if (title.startsWith("error"))
checkBody = true;
isTitle = false;
title = "";
}
continue;
}
// if we have to add all of the text so that we can check it later
// for problems with the username and password
if (checkBody && token.getType() == Token.TEXT) {
body.append((StringBuffer) token.getValue());
body.append(" ");
}
// we have found the start of an attribute name
if ((state == ParserState.ATT_NAME || state == ParserState.START) && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.TD && "right".equalsIgnoreCase(tag.getAttribute("align"))) {
// parse the attribute's name
attribute = parseAttributeName(tokenizer);
if (attribute != null && attribute.contains(IBugzillaConstants.INVALID_2201_ATTRIBUTE_IGNORED)) {
continue;
}
if (attribute.toLowerCase().startsWith("opened")) {
// find the colon so we can get the date
int index = attribute.toLowerCase().indexOf(":");
String date;
if (index != -1)
date = attribute.substring(index + 1).trim();
else
date = attribute.substring(6).trim();
// set the bugs opened date to be the date we parsed
bug.setCreated(df.parse(date));
state = ParserState.ATT_NAME;
continue;
}
// in 2.18, the last modified looks like the opened so we
// need to parse it differently
if (attribute.toLowerCase().startsWith("last modified") && is218) {
// find the colon so we can get the date
int index = attribute.toLowerCase().indexOf(":");
String date;
if (index != -1)
date = attribute.substring(index + 1).trim();
else
date = attribute.substring(6).trim();
// create a new attribute and set the date
AbstractRepositoryReportAttribute t = new AbstractRepositoryReportAttribute("Last Modified");
t.setValue(date);
// add the attribute to the bug report
bug.addAttribute(t);
bug.setLastModified(df.parse(date));
state = ParserState.ATT_NAME;
continue;
}
state = ParserState.ATT_VALUE;
continue;
} else if (tag.getTagType() == HtmlTag.Type.INPUT && "radio".equalsIgnoreCase(tag.getAttribute("type"))
&& "knob".equalsIgnoreCase(tag.getAttribute(KEY_NAME))) {
// we found a radio button
parseOperations(bug, tokenizer, tag, is218);
}
}
// we have found the start of attribute values
if (state == ParserState.ATT_VALUE && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.TD) {
// parse the attribute values
parseAttributeValue(bug, attribute, tokenizer, serverUrl, userName, password);
state = ParserState.ATT_NAME;
attribute = null;
continue;
}
}
// we have found the start of a comment
if (state == ParserState.DESC_START && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.I) {
// parse the comment's start
comment = parseCommentHead(bug, tokenizer);
state = ParserState.DESC_VALUE;
continue;
}
}
// we have found the start of the comment text
if (state == ParserState.DESC_VALUE && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.PRE) {
// parse the text of the comment
parseCommentText(bug, comment, tokenizer);
comment = null;
state = ParserState.DESC_START;
continue;
}
}
// last modification date
if (bug.getCreated() == null && (state == ParserState.ATT_NAME || state == ParserState.START) && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.DIV && tag.getAttribute("id") != null && "header".equalsIgnoreCase(tag.getAttribute("id"))) {
StringBuffer sb = new StringBuffer();
parseLastModified(sb, tokenizer);
if(sb.length() > 0) {
int index = sb.indexOf(":");
String date;
if (index != -1)
date = sb.substring(index + 1).trim();
else
date = sb.substring(6).trim();
// create a new attribute and set the date
AbstractRepositoryReportAttribute t = new AbstractRepositoryReportAttribute("Last Modified");
t.setValue(date);
// add the attribute to the bug report
bug.setLastModified(df.parse(date));
bug.addAttribute(t);
}
continue;
}
}
// look for date opened field
if (bug.getCreated() == null && (state == ParserState.ATT_NAME || state == ParserState.START) && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.TD && tag.getAttribute("align") != null && "left".equalsIgnoreCase(tag.getAttribute("align")) && tag.getAttribute("width") != null && "30%".equals(tag.getAttribute("width"))) {
StringBuffer sb = new StringBuffer();
parseDateOpened(sb, tokenizer);
if(sb.length() > 0) {
int index = sb.indexOf(":");
String date;
if (index != -1)
date = sb.substring(index + 1).trim();
else
date = sb.substring(6).trim();
// set the bugs opened date to be the date we parsed
bug.setCreated(df.parse(date));
}
continue;
}
}
// we have found the description of the bug
if ((state == ParserState.ATT_NAME || state == ParserState.START) && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.PRE) {
// parse the description for the bug
parseDescription(bug, tokenizer);
state = ParserState.DESC_START;
continue;
}
}
// parse hidden fields
if ((state == ParserState.ATT_NAME || state == ParserState.START) && token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.INPUT && tag.getAttribute("type") != null
&& "hidden".equalsIgnoreCase(tag.getAttribute("type").trim())) {
AbstractRepositoryReportAttribute a = new AbstractRepositoryReportAttribute(tag.getAttribute(KEY_NAME));
a.setID(tag.getAttribute(KEY_NAME));
a.setValue(tag.getAttribute(KEY_VALUE));
a.setHidden(true);
bug.addAttribute(a);
continue;
}
}
// // parse out attachments
// if(token.getType() == Token.TAG) {
// HtmlTag tag = (HtmlTag) token.getValue();
// if(tag.getTagType() == HtmlTag.Type.A && tag.getAttribute("href")
// != null) {
// String link = tag.getAttribute("href");
// if(link.startsWith("attachment.cgi?id=") &&
// !link.contains("action")) {
// int attachmentID = Integer.parseInt(link.substring(18));
// String description = parseAttachementDescription(tokenizer);
// bug.addAttachment(attachmentID, description);
// }
// }
// }
}
// if we are to check the body, make sure that there wasn't a bad login
if (checkBody) {
String b = body.toString();
if (b.indexOf("login") != -1
|| ((b.indexOf("invalid") != -1 || b.indexOf("not valid") != -1) && b.indexOf("password") != -1)
|| b.indexOf("check e-mail") != -1)
possibleBadLogin = true; // we possibly have a bad login
}
// if there is no summary or created date, we expect that
// the bug doesn't exist, so set it to null
// if the bug seems like it doesn't exist, and we suspect a login
// problem, assume that there was a login problem
if (bug.getCreated() == null && bug.getAttributes().isEmpty()) {
if (possibleBadLogin) {
throw new LoginException(IBugzillaConstants.MESSAGE_LOGIN_FAILURE);
} else {
return null;
}
}
// we are done...return the bug
return bug;
}
private static void parseDateOpened(StringBuffer sb, HtmlStreamTokenizer tokenizer) throws IOException, ParseException {
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.TD && tag.isEndTag())
break;
} else if (token.getType() == Token.TEXT) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append((StringBuffer) token.getValue());
}
}
}
private static void parseLastModified(StringBuffer sb, HtmlStreamTokenizer tokenizer) throws IOException, ParseException {
boolean inH3 = false;
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
HtmlTag tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.H3 && !tag.isEndTag()) {
inH3 = true;
continue;
} else if (tag.getTagType() == HtmlTag.Type.DIV && tag.isEndTag()) {
break;
}
} else if (token.getType() == Token.TEXT && inH3) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append((StringBuffer) token.getValue());
}
}
}
public static String getCharsetFromString(String string) {
int charsetStartIndex = string.indexOf(ATTR_CHARSET);
if (charsetStartIndex != -1) {
int charsetEndIndex = string.indexOf("\"", charsetStartIndex); // TODO:
// could
// be
// space
// after?
if (charsetEndIndex == -1) {
charsetEndIndex = string.length();
}
String charsetString = string.substring(charsetStartIndex + 8, charsetEndIndex);
if (Charset.availableCharsets().containsKey(charsetString)) {
return charsetString;
}
}
return null;
}
/**
* Parse the operations that are allowed on the bug (Assign, Re-open, fix)
*
* @param bug
* The bug to add the operations to
* @param tokenizer
* The stream tokenizer for the bug
* @param tag
* The last tag that we were on
*/
private static void parseOperations(BugReport bug, HtmlStreamTokenizer tokenizer, HtmlTag tag, boolean is218)
throws ParseException, IOException {
String knobName = tag.getAttribute(KEY_VALUE);
boolean isChecked = false;
if (tag.getAttribute("checked") != null && tag.getAttribute("checked").equals("checked"))
isChecked = true;
StringBuffer sb = new StringBuffer();
Token lastTag = null;
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TAG) {
tag = (HtmlTag) token.getValue();
if (!(tag.getTagType() == HtmlTag.Type.A || tag.getTagType() == HtmlTag.Type.B
|| tag.getTagType() == HtmlTag.Type.STRONG || tag.getTagType() == HtmlTag.Type.LABEL)) {
lastTag = token;
break;
} else {
if (is218 && tag.getTagType() == HtmlTag.Type.LABEL) {
continue;
} else if (tag.getTagType() == HtmlTag.Type.A || tag.getTagType() == HtmlTag.Type.B
|| tag.getTagType() == HtmlTag.Type.STRONG) {
sb.append(tag.toString().trim() + " ");
} else {
break;
}
}
} else if (token.getType() == Token.TEXT && !token.toString().trim().equals("\n"))
sb.append(token.toString().trim() + " ");
}
String displayName = HtmlStreamTokenizer.unescape(sb).toString();
Operation o = new Operation(knobName, displayName);
o.setChecked(isChecked);
if (lastTag != null) {
tag = (HtmlTag) lastTag.getValue();
if (tag.getTagType() != HtmlTag.Type.SELECT) {
tokenizer.pushback(lastTag);
if (tag.getTagType() == HtmlTag.Type.INPUT
&& !("radio".equalsIgnoreCase(tag.getAttribute("type")) && "knob".equalsIgnoreCase(tag
.getAttribute(KEY_NAME)))) {
o.setInputName(((HtmlTag) lastTag.getValue()).getAttribute(KEY_NAME));
o.setInputValue(((HtmlTag) lastTag.getValue()).getAttribute(KEY_VALUE));
}
} else {
Token token = tokenizer.nextToken();
// parse the options
tag = (HtmlTag) token.getValue();
o.setUpOptions(((HtmlTag) lastTag.getValue()).getAttribute(KEY_NAME));
while (token.getType() != Token.EOF) {
if (token.getType() == Token.TAG) {
tag = (HtmlTag) token.getValue();
if (tag.getTagType() == HtmlTag.Type.SELECT && tag.isEndTag())
break;
if (tag.getTagType() == HtmlTag.Type.OPTION && !tag.isEndTag()) {
String optionName = tag.getAttribute(KEY_VALUE);
StringBuffer optionText = new StringBuffer();
for (token = tokenizer.nextToken(); token.getType() == Token.TEXT; token = tokenizer
.nextToken()) {
if (optionText.length() > 0) {
optionText.append(' ');
}
optionText.append((StringBuffer) token.getValue());
}
o.addOption(optionText.toString(), optionName);
} else {
token = tokenizer.nextToken();
}
} else {
token = tokenizer.nextToken();
}
}
}
}
bug.addOperation(o);
}
/**
* Enum class for describing current state of Bugzilla report parser.
*/
private static class ParserState {
/** An instance of the start state */
protected static final ParserState START = new ParserState("start");
/** An instance of the state when the parser found an attribute name */
protected static final ParserState ATT_NAME = new ParserState("att_name");
/** An instance of the state when the parser found an attribute value */
protected static final ParserState ATT_VALUE = new ParserState("att_value");
/** An instance of the state when the parser found a description */
protected static final ParserState DESC_START = new ParserState("desc_start");
/** An instance of the state when the parser found a description value */
protected static final ParserState DESC_VALUE = new ParserState("desc_value");
/** State's human-readable name */
private String name;
/**
* Constructor
*
* @param description -
* The states human readable name
*/
private ParserState(String description) {
this.name = description;
}
@Override
public String toString() {
return name;
}
}
}