bundles/org.eclipse.wst.ws.parser/src/org/eclipse/wst/ws/internal/parser/wsil/HTMLHeadHandler.java - webservices/webtools.webservices - Git at Google

 /*******************************************************************************
  * Copyright (c) 2001, 2006 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  * IBM Corporation - initial API and implementation
  * yyyymmdd bug      Email and other contact information
  * -------- -------- -----------------------------------------------------------
  * 20060517   142324 rsinha@ca.ibm.com - Rupam Kuehner
  *******************************************************************************/

 package org.eclipse.wst.ws.internal.parser.wsil;

 import java.io.UnsupportedEncodingException;
 import java.util.Vector;

 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.SAXParseException;
 import org.xml.sax.helpers.DefaultHandler;

 public class HTMLHeadHandler extends DefaultHandler
 {
   private final char START_TAG = '<';
   private final char END_TAG = '>';
   private final String HEAD_START_TAG = "<head>";
   private final String HEAD_END_TAG = "</head>";
   private final String ROOT_START_TAG = "<root>";
   private final String ROOT_END_TAG = "</root>";
   private final String UTF8 = "UTF-8";

   //HTML META tag information used to detect the charset.
   private final String HTML_CONTENT = "content";
   private final String HTTP_EQUIV = "http-equiv";
   private final String HTTP_EQUIV_CONTENT_TYPE = "Content-Type";
   private final String CHARSET = "charset";

   // WSIL tag information.
   private final String META = "meta";
   private final String NAME = "name";
   private final String SERVICE_INSPECTION = "serviceInspection";
   private final String CONTENT = "content";

   // DISCO tag information.
   private final String LINK = "link";
   private final String TYPE = "type";
   private final String TEXT_XML = "text/xml";
   private final String REL = "rel";
   private final String ALTERNATE = "alternate";
   private final String HREF = "href";

   private String baseURI_;
   private Vector wsils_;
   private Vector discos_;
   private String byteEncoding = UTF8; //Default to UTF-8.

   public HTMLHeadHandler(String baseURI)
   {
     super();
     baseURI_ = baseURI;
     wsils_ = new Vector();
     discos_ = new Vector();
   }

   public String[] getWsils()
   {
     String[] wsils = new String[wsils_.size()];
     wsils_.copyInto(wsils);
     return wsils;
   }

   public String[] getDiscos()
   {
     String[] discos = new String[discos_.size()];
     discos_.copyInto(discos);
     return discos;
   }

   public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
   {
     String qNameLC = qName.toLowerCase();
     if (qNameLC.equals(META))
     {
       String nameValue = attributes.getValue(NAME);
       if (SERVICE_INSPECTION.equals(nameValue))
       {
         String wsilURI = attributes.getValue(CONTENT);
         if (baseURI_ != null && wsilURI.indexOf(":/") == -1)
         {
           StringBuffer sb = new StringBuffer();
           sb.append(baseURI_.substring(0, baseURI_.lastIndexOf("/")+1));
           sb.append(wsilURI);
           wsilURI = sb.toString();
         }
         if (!wsils_.contains(wsilURI))
           wsils_.add(wsilURI);
       }
     }
     else if (qNameLC.equals(LINK))
     {
       // See http://msdn.microsoft.com/msdnmag/issues/02/02/xml/default.aspx for more details on DISCO.
       String type = attributes.getValue(TYPE);
       String rel = attributes.getValue(REL);
       String href = attributes.getValue(HREF);
       if (TEXT_XML.equals(type) && ALTERNATE.equals(rel) && href != null)
       {
         String discoURI = href;
         if (discoURI.indexOf(":/") == -1)
         {
           StringBuffer sb = new StringBuffer();
           sb.append(baseURI_.substring(0,baseURI_.lastIndexOf("/")+1));
           sb.append(discoURI);
           discoURI = sb.toString();
         }
         if (!discos_.contains(discoURI))
           discos_.add(discoURI);
       }
     }
   }

   public void error(SAXParseException e) throws SAXException
   {
   }

   public void fatalError(SAXParseException e) throws SAXException
   {
   }

   public void warning(SAXParseException e) throws SAXException
   {
   }

   /**
    * Appends the elements of the provided tag in the provided document to the provided StringBuffer.
    * @param target
    * @param document
    * @param tag
    * @param encoding
    * @return boolean false if the value of the encoding parameter matched the detected charset or if no charset was detected.
    * Returns true if a charset was detected and it did not equal the encoding parameter. If true is returned
    * the harvesting of the tags would have stopped at the point the charset was detected. The caller
    * should call this method again with the correct encoding.
    */
   private boolean harvestTags(StringBuffer target,String document,String tag, String encoding)
   {
 	boolean changeEncoding = false;
     int index = document.indexOf(START_TAG);
     int documentLength = document.length();
     int tagLength = tag.length();
     while (index != -1 && (index+1+tagLength)<documentLength)
     {
       String str = document.substring(index+1,index+1+tagLength);
       if (str.toLowerCase().equals(tag))
       {
         str = document.substring(index,document.indexOf(END_TAG,index+1)+1);
         target.append(str);
         index += str.length();

         //If tag is META and declares the charset, find out what it is
         //and if it matches what was passed in. If it matches, continue
         //with the parsing and return false when complete.
         //If the detected charset is different from what was passed in,
         //- change byteEncoding to equal the detected charset.
         //- stop parsing.
         //- return true.
         if (tag.equals(META))
         {
           int idxOfContent = str.indexOf(HTML_CONTENT);
           int idxOfHTTPEQUIV = str.indexOf(HTTP_EQUIV);
           if (idxOfHTTPEQUIV!= -1 && idxOfContent != -1)
           {
         	//Check if the http-equiv attribute is set to Content-Type.
           	int idxOfHTTPEQUIVOpenQuote = str.indexOf("\"", idxOfHTTPEQUIV+1);
         	int idxOfHTTPEQUIVClosingQuote = str.indexOf("\"", idxOfHTTPEQUIVOpenQuote+1);
         	String hTTPEQUIVValueUntrimmed = str.substring(idxOfHTTPEQUIVOpenQuote+1, idxOfHTTPEQUIVClosingQuote);
         	if (hTTPEQUIVValueUntrimmed.trim().equals(HTTP_EQUIV_CONTENT_TYPE))
         	{
         	  //This META tag contains the charset. Get the value of the content attribute
         	  int idxOfOpenQuote = str.indexOf("\"", idxOfContent+1);
         	  int idxOfClosingQuote = str.indexOf("\"", idxOfOpenQuote+1);
         	  String contentValue = str.substring(idxOfOpenQuote+1, idxOfClosingQuote);

         	  //Get the charset
         	  int idxOfCharSet = contentValue.indexOf(CHARSET);
         	  int idxOfEquals = contentValue.indexOf("=", idxOfCharSet+CHARSET.length());
         	  String detectedEncodingValueUntrimmed = contentValue.substring(idxOfEquals+1);
         	  String detectedEncodingValue = detectedEncodingValueUntrimmed.trim();
         	  if (!detectedEncodingValue.equals(encoding))
         	  {
         	    byteEncoding = detectedEncodingValue;
         	    changeEncoding = true;
         	    break;
         	  }
             }
           }
         }
       }
       else
         index++;
       index = document.indexOf(START_TAG,index);
     }

     return changeEncoding;
   }


   /**
    * If the provided byte array reperesents the contents of an HTML
    * document, this method will return a byte array in which
    * <ul>
    * <li>the opening and closing HEAD tags are removed and replaced with
    * opening and closing <root> tags</li>
    * <li>only the META and LINK elements are in the HTML document
    * are included in the contents between the opening and closing
    * <root> tags.
    * </ul>
    * This method will modify the value of the byteEncoding String
    * attribute on this class if it is something other than
    * UTF-8. Callers of this method should call getByteEncoding()
    * after calling this method if they need to know the charset
    * value used by this method to decode/endcode the byte array.
    * @param b
    * @return byte[]
    */
   public byte[] harvestHeadTags(byte[] b)
   {
     String s;

     try
     {
     	//Assume the default byte encoding of UTF-8 for now.
     	s = new String(b, byteEncoding);
     }
     catch (UnsupportedEncodingException uee)
     {
       s = new String(b);
     }
     String head = s.toLowerCase();
     int headStartIndex = head.indexOf(HEAD_START_TAG);
     int headEndIndex = head.indexOf(HEAD_END_TAG);
     StringBuffer sb = new StringBuffer();
     sb.append(ROOT_START_TAG);
     if (headStartIndex != -1 && headEndIndex != -1)
     {
       head = s.substring(headStartIndex, headEndIndex+HEAD_END_TAG.length());
       boolean encodingChanged = harvestTags(sb,head,META, byteEncoding);
       if (encodingChanged)
       {
     	  //The harvestTags method detected a different charset
     	  //than the one that was passed in. Start from the beginning
     	  //with the correct charset.
     	    String s2;
     	    try
     	    {
     	    	s2 = new String(b, byteEncoding);
     	    }
     	    catch (UnsupportedEncodingException uee)
     	    {
     	      s2 = new String(b);
     	    }
     	    String head2 = s2.toLowerCase();
     	    int head2StartIndex = head2.indexOf(HEAD_START_TAG);
     	    int head2EndIndex = head2.indexOf(HEAD_END_TAG);
     	    sb = new StringBuffer();
     	    sb.append(ROOT_START_TAG);
     	    if (head2StartIndex != -1 && head2EndIndex != -1)
     	    {
     	      head2 = s2.substring(head2StartIndex, head2EndIndex+HEAD_END_TAG.length());
     	      harvestTags(sb,head2,META, byteEncoding);
     	      harvestTags(sb,head2,LINK,byteEncoding);
     	    }
       }
       else
       {
         harvestTags(sb,head,LINK,byteEncoding);
       }
     }
     sb.append(ROOT_END_TAG);
     try
     {
     	return sb.toString().getBytes(byteEncoding);
     } catch (UnsupportedEncodingException uee)
     {
       return sb.toString().getBytes();
     }

   }

   public String getByteEncoding()
   {
 	  return byteEncoding;
   }
 }
	/*******************************************************************************
	* Copyright (c) 2001, 2006 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	* yyyymmdd bug Email and other contact information
	* -------- -------- -----------------------------------------------------------
	* 20060517 142324 rsinha@ca.ibm.com - Rupam Kuehner
	*******************************************************************************/

	package org.eclipse.wst.ws.internal.parser.wsil;

	import java.io.UnsupportedEncodingException;
	import java.util.Vector;

	import org.xml.sax.Attributes;
	import org.xml.sax.SAXException;
	import org.xml.sax.SAXParseException;
	import org.xml.sax.helpers.DefaultHandler;

	public class HTMLHeadHandler extends DefaultHandler
	{
	private final char START_TAG = '<';
	private final char END_TAG = '>';
	private final String HEAD_START_TAG = "<head>";
	private final String HEAD_END_TAG = "</head>";
	private final String ROOT_START_TAG = "<root>";
	private final String ROOT_END_TAG = "</root>";
	private final String UTF8 = "UTF-8";

	//HTML META tag information used to detect the charset.
	private final String HTML_CONTENT = "content";
	private final String HTTP_EQUIV = "http-equiv";
	private final String HTTP_EQUIV_CONTENT_TYPE = "Content-Type";
	private final String CHARSET = "charset";

	// WSIL tag information.
	private final String META = "meta";
	private final String NAME = "name";
	private final String SERVICE_INSPECTION = "serviceInspection";
	private final String CONTENT = "content";

	// DISCO tag information.
	private final String LINK = "link";
	private final String TYPE = "type";
	private final String TEXT_XML = "text/xml";
	private final String REL = "rel";
	private final String ALTERNATE = "alternate";
	private final String HREF = "href";

	private String baseURI_;
	private Vector wsils_;
	private Vector discos_;
	private String byteEncoding = UTF8; //Default to UTF-8.

	public HTMLHeadHandler(String baseURI)
	{
	super();
	baseURI_ = baseURI;
	wsils_ = new Vector();
	discos_ = new Vector();
	}

	public String[] getWsils()
	{
	String[] wsils = new String[wsils_.size()];
	wsils_.copyInto(wsils);
	return wsils;
	}

	public String[] getDiscos()
	{
	String[] discos = new String[discos_.size()];
	discos_.copyInto(discos);
	return discos;
	}

	public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
	{
	String qNameLC = qName.toLowerCase();
	if (qNameLC.equals(META))
	{
	String nameValue = attributes.getValue(NAME);
	if (SERVICE_INSPECTION.equals(nameValue))
	{
	String wsilURI = attributes.getValue(CONTENT);
	if (baseURI_ != null && wsilURI.indexOf(":/") == -1)
	{
	StringBuffer sb = new StringBuffer();
	sb.append(baseURI_.substring(0, baseURI_.lastIndexOf("/")+1));
	sb.append(wsilURI);
	wsilURI = sb.toString();
	}
	if (!wsils_.contains(wsilURI))
	wsils_.add(wsilURI);
	}
	}
	else if (qNameLC.equals(LINK))
	{
	// See http://msdn.microsoft.com/msdnmag/issues/02/02/xml/default.aspx for more details on DISCO.
	String type = attributes.getValue(TYPE);
	String rel = attributes.getValue(REL);
	String href = attributes.getValue(HREF);
	if (TEXT_XML.equals(type) && ALTERNATE.equals(rel) && href != null)
	{
	String discoURI = href;
	if (discoURI.indexOf(":/") == -1)
	{
	StringBuffer sb = new StringBuffer();
	sb.append(baseURI_.substring(0,baseURI_.lastIndexOf("/")+1));
	sb.append(discoURI);
	discoURI = sb.toString();
	}
	if (!discos_.contains(discoURI))
	discos_.add(discoURI);
	}
	}
	}

	public void error(SAXParseException e) throws SAXException
	{
	}

	public void fatalError(SAXParseException e) throws SAXException
	{
	}

	public void warning(SAXParseException e) throws SAXException
	{
	}

	/**
	* Appends the elements of the provided tag in the provided document to the provided StringBuffer.
	* @param target
	* @param document
	* @param tag
	* @param encoding
	* @return boolean false if the value of the encoding parameter matched the detected charset or if no charset was detected.
	* Returns true if a charset was detected and it did not equal the encoding parameter. If true is returned
	* the harvesting of the tags would have stopped at the point the charset was detected. The caller
	* should call this method again with the correct encoding.
	*/
	private boolean harvestTags(StringBuffer target,String document,String tag, String encoding)
	{
	boolean changeEncoding = false;
	int index = document.indexOf(START_TAG);
	int documentLength = document.length();
	int tagLength = tag.length();
	while (index != -1 && (index+1+tagLength)<documentLength)
	{
	String str = document.substring(index+1,index+1+tagLength);
	if (str.toLowerCase().equals(tag))
	{
	str = document.substring(index,document.indexOf(END_TAG,index+1)+1);
	target.append(str);
	index += str.length();

	//If tag is META and declares the charset, find out what it is
	//and if it matches what was passed in. If it matches, continue
	//with the parsing and return false when complete.
	//If the detected charset is different from what was passed in,
	//- change byteEncoding to equal the detected charset.
	//- stop parsing.
	//- return true.
	if (tag.equals(META))
	{
	int idxOfContent = str.indexOf(HTML_CONTENT);
	int idxOfHTTPEQUIV = str.indexOf(HTTP_EQUIV);
	if (idxOfHTTPEQUIV!= -1 && idxOfContent != -1)
	{
	//Check if the http-equiv attribute is set to Content-Type.
	int idxOfHTTPEQUIVOpenQuote = str.indexOf("\"", idxOfHTTPEQUIV+1);
	int idxOfHTTPEQUIVClosingQuote = str.indexOf("\"", idxOfHTTPEQUIVOpenQuote+1);
	String hTTPEQUIVValueUntrimmed = str.substring(idxOfHTTPEQUIVOpenQuote+1, idxOfHTTPEQUIVClosingQuote);
	if (hTTPEQUIVValueUntrimmed.trim().equals(HTTP_EQUIV_CONTENT_TYPE))
	{
	//This META tag contains the charset. Get the value of the content attribute
	int idxOfOpenQuote = str.indexOf("\"", idxOfContent+1);
	int idxOfClosingQuote = str.indexOf("\"", idxOfOpenQuote+1);
	String contentValue = str.substring(idxOfOpenQuote+1, idxOfClosingQuote);

	//Get the charset
	int idxOfCharSet = contentValue.indexOf(CHARSET);
	int idxOfEquals = contentValue.indexOf("=", idxOfCharSet+CHARSET.length());
	String detectedEncodingValueUntrimmed = contentValue.substring(idxOfEquals+1);
	String detectedEncodingValue = detectedEncodingValueUntrimmed.trim();
	if (!detectedEncodingValue.equals(encoding))
	{
	byteEncoding = detectedEncodingValue;
	changeEncoding = true;
	break;
	}
	}
	}
	}
	}
	else
	index++;
	index = document.indexOf(START_TAG,index);
	}

	return changeEncoding;
	}


	/**
	* If the provided byte array reperesents the contents of an HTML
	* document, this method will return a byte array in which
	* <ul>
	* <li>the opening and closing HEAD tags are removed and replaced with
	* opening and closing <root> tags</li>
	* <li>only the META and LINK elements are in the HTML document
	* are included in the contents between the opening and closing
	* <root> tags.
	* </ul>
	* This method will modify the value of the byteEncoding String
	* attribute on this class if it is something other than
	* UTF-8. Callers of this method should call getByteEncoding()
	* after calling this method if they need to know the charset
	* value used by this method to decode/endcode the byte array.
	* @param b
	* @return byte[]
	*/
	public byte[] harvestHeadTags(byte[] b)
	{
	String s;

	try
	{
	//Assume the default byte encoding of UTF-8 for now.
	s = new String(b, byteEncoding);
	}
	catch (UnsupportedEncodingException uee)
	{
	s = new String(b);
	}
	String head = s.toLowerCase();
	int headStartIndex = head.indexOf(HEAD_START_TAG);
	int headEndIndex = head.indexOf(HEAD_END_TAG);
	StringBuffer sb = new StringBuffer();
	sb.append(ROOT_START_TAG);
	if (headStartIndex != -1 && headEndIndex != -1)
	{
	head = s.substring(headStartIndex, headEndIndex+HEAD_END_TAG.length());
	boolean encodingChanged = harvestTags(sb,head,META, byteEncoding);
	if (encodingChanged)
	{
	//The harvestTags method detected a different charset
	//than the one that was passed in. Start from the beginning
	//with the correct charset.
	String s2;
	try
	{
	s2 = new String(b, byteEncoding);
	}
	catch (UnsupportedEncodingException uee)
	{
	s2 = new String(b);
	}
	String head2 = s2.toLowerCase();
	int head2StartIndex = head2.indexOf(HEAD_START_TAG);
	int head2EndIndex = head2.indexOf(HEAD_END_TAG);
	sb = new StringBuffer();
	sb.append(ROOT_START_TAG);
	if (head2StartIndex != -1 && head2EndIndex != -1)
	{
	head2 = s2.substring(head2StartIndex, head2EndIndex+HEAD_END_TAG.length());
	harvestTags(sb,head2,META, byteEncoding);
	harvestTags(sb,head2,LINK,byteEncoding);
	}
	}
	else
	{
	harvestTags(sb,head,LINK,byteEncoding);
	}
	}
	sb.append(ROOT_END_TAG);
	try
	{
	return sb.toString().getBytes(byteEncoding);
	} catch (UnsupportedEncodingException uee)
	{
	return sb.toString().getBytes();
	}

	}

	public String getByteEncoding()
	{
	return byteEncoding;
	}
	}