core/org.eclipse.smila.importing.crawler.web.test/code/src/org/eclipse/smila/importing/crawler/web/test/TestDefaultLinkExtractor.java - smila/org.eclipse.smila.core - Git at Google

 /*******************************************************************************
  * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
  * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
  * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors: Andreas Weber (Attensity Europe GmbH) - initial API and implementation
  *******************************************************************************/
 package org.eclipse.smila.importing.crawler.web.test;

 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

 import org.eclipse.smila.datamodel.DataFactory;
 import org.eclipse.smila.datamodel.Record;
 import org.eclipse.smila.importing.crawler.web.LinkExtractor;
 import org.eclipse.smila.importing.crawler.web.WebCrawlerConstants;
 import org.eclipse.smila.importing.crawler.web.extractor.DefaultLinkExtractor;
 import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtmlNeko;
 import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtmlSoup;

 /** Test for {@link DefaultLinkExtractor} class. */
 public class TestDefaultLinkExtractor extends WebExtractorTestBase {

   private static final String BASE_URI = "http://www.attensity.com";

   private static final String BASE_URI_WITH_PATH = BASE_URI + "/p";

   private static final String BASE_URI_WITH_PATH_AND_FILE = BASE_URI_WITH_PATH + "/test.html";

   private DefaultLinkExtractor _extractor;

   @Override
   protected void setUp() throws Exception {
     super.setUp();
     _extractor = (DefaultLinkExtractor) getService(LinkExtractor.class);
   }

   /** test for extracting absolute link. */
   public void testSimpleHref() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     final String link = BASE_URI_WITH_PATH + "/link.html";
     testData.put(link, link);
     final String htmlString = "<html> <a href=\"" + link + "\"/> </html>";
     final Record inputRecord = createInputRecord(htmlString);
     doTest(inputRecord, testData);
   }

   /** test for extracting absolute link with incomplete html. */
   public void testSimpleHrefIncompleteHtml() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     final String link = BASE_URI_WITH_PATH + "/link.htm";
     testData.put(link, link);
     final String htmlString = "<html> <a href=\"" + link + "\"";
     final Record inputRecord = createInputRecord(htmlString);
     doTest(inputRecord, testData);
   }

   /** test for extracting relative links. */
   public void testRelativeLinks() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     // key: test link, value: expected absolute link
     testData.put("/link1.html", BASE_URI + "/link1.html");
     testData.put("link2.html", BASE_URI_WITH_PATH + "/link2.html");
     testData.put("../link3.htm", BASE_URI + "/link3.htm");
     testData.put("./link4.htm", BASE_URI_WITH_PATH + "/link4.htm");
     final String htmlString = createHtmlString(testData);
     final Record inputRecord = createInputRecord(htmlString);
     doTest(inputRecord, testData);
   }

   /**
    * tests that links can be extracted even if HTML is malformed. This test succeeds for tagsoup but fails for nekohtml.
    */
   public void testMalformedHtml() throws Exception {
     final String link1 = BASE_URI_WITH_PATH + "/link1.html";
     final String link2 = BASE_URI_WITH_PATH + "/link2.html";
     final Map<String, String> testData = new HashMap<String, String>();
     testData.put(link1, link1);
     testData.put(link2, link2);
     final String htmlString = "<p> <title> </p> " //
       + "<a href=\"" + link1 + "\"> </A>" //
       + "<p> </title> </p> </p>" //
       + "<A HrEF=\"" + link2 + "\"> </a>"; //
     final Record inputRecord = createInputRecord(htmlString);
     // this only works for tagsoup!
     _extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
     final Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
     checkResults(new ArrayList<String>(testData.values()), result);
   }

   /** test for extracting links from 'FRAME' element. */
   public void testFrameLink() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     final String link1 = BASE_URI_WITH_PATH + "/navigation_Left.htm";
     final String link2 = BASE_URI_WITH_PATH + "/introduction.htm";
     testData.put(link1, link1);
     testData.put(link2, link2);
     final String htmlString = "<frameset> " //
       + "<frame name=\"Navigation_Frame\" src=\"navigation_Left.htm\" marginheight=\"0\" marginwidth=\"0\"/>" //
       + "<FRAME name=\"ContentFrame\" SRC=\"introduction.htm\" marginheight=\"0\" marginwidth=\"0\"/>" //
       + "</frameset>";
     final Record inputRecord = createInputRecord(htmlString);
     // this only works for tagsoup!
     _extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
     final Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
     checkResults(new ArrayList<String>(testData.values()), result);
   }

   /** test for extracting links from 'IMG' element. */
   public void testImageLink() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     final String link1 = BASE_URI_WITH_PATH + "/icon.gif";
     final String link2 = BASE_URI_WITH_PATH + "/images/picture.jpg";
     testData.put(link1, link1);
     testData.put(link2, link2);
     final String htmlString = "<html><body><img src=\"" + link1 + "\"> <img src=\"" + link2 + "\"></body></html>";
     final Record inputRecord = createInputRecord(htmlString);
     // this only works for tagsoup!
     _extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
     final Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
     checkResults(new ArrayList<String>(testData.values()), result);
   }

   /** test with unescaped link. Hint: We can't extract unescaped relative links at the moment */
   public void testUnescapedLink() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     final String link1 = BASE_URI_WITH_PATH + "/test1.html";
     final String linkUnescapedRel = "this link is not escaped"; // this can not be extracted!
     final String linkUnescapedAbs = "http://www.attensity.com/this link is not escaped/?query=a b";
     final String link2 = BASE_URI_WITH_PATH + "/test2.html";
     testData.put(link1, link1);
     testData.put(linkUnescapedAbs, "http://www.attensity.com/this%20link%20is%20not%20escaped/?query=a%20b");
     testData.put(link2, link2);
     final String htmlString = "<title>" //
       + "<a href=\"" + link1 + "\"> </a>" //
       + "<a href=\"" + linkUnescapedRel + "\"> </a>" //
       + "<a href=\"" + linkUnescapedAbs + "\"> </a>" //
       + "<a href=\"" + link2 + "\"> </a>"; //
     final Record inputRecord = createInputRecord(htmlString);
     doTest(inputRecord, testData);
   }

   /** test with escaped link. */
   public void testEscapedLink() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     final String link1 = BASE_URI_WITH_PATH + "/test1.html";
     final String linkEscapedAbs = "http://www.attensity.com/p/this%20link%20is%20escaped/?query=a%20b";
     final String linkEscapedRel = "this%20link%20is%20escaped/?query=a%20b";
     final String link2 = BASE_URI_WITH_PATH + "/test2.html";
     testData.put(link1, link1);
     testData.put(linkEscapedAbs, linkEscapedAbs);
     testData.put(linkEscapedRel, linkEscapedAbs);
     testData.put(link2, link2);
     final String htmlString = "<title>" //
       + "<a href=\"" + link1 + "\"> </a>" //
       + "<a href=\"" + linkEscapedAbs + "\"> </a>" //
       + "<a href=\"" + linkEscapedRel + "\"> </a>" //
       + "<a href=\"" + link2 + "\"> </a>"; //
     final Record inputRecord = createInputRecord(htmlString);
     doTest(inputRecord, testData);
   }

   /** test link normlization. */
   public void testLinkNormalization() throws Exception {
     final Map<String, String> testData = new HashMap<String, String>();
     final String link1 = "HTtp://WWW.Attensity.com:8080/Test1.html"; // -> scheme and host are converted to lower case
     final String link2 = "http://www.attensity.com#fragment"; // -> fragment parts are removed
     final String link3 = "http://www.attensity.com/?Query=q&query2=q"; // -> query parts remain
     final String link4 = "http://www.attensity.com:80/port"; // -> default port 80 is removed
     final String link5 = "http://www.attensity.com/test unescaped"; // -> link will be escaped
     final String link6 = "http://www.attensity.com/test%20escaped"; // -> escaped link remains
     final String link7 = "http://www.attensity.com/path/../path2"; // -> path is normalized
     final String link8 = "javascript:void(0);"; // invalid
     final String link9 = "mailto:andreas.weber@empolis.com"; // invalid

     testData.put(link1, "http://www.attensity.com:8080/Test1.html");
     testData.put(link2, "http://www.attensity.com");
     testData.put(link3, link3);
     testData.put(link4, "http://www.attensity.com/port");
     testData.put(link5, "http://www.attensity.com/test%20unescaped");
     testData.put(link6, link6);
     testData.put(link7, "http://www.attensity.com/path2");
     final String htmlString = "<title>" //
       + "<a href=\"" + link1 + "\"> </a>" //
       + "<a href=\"" + link2 + "\"> </a>" //
       + "<a href=\"" + link3 + "\"> </a>" //
       + "<a href=\"" + link4 + "\"> </a>" //
       + "<a href=\"" + link5 + "\"> </a>" //
       + "<a href=\"" + link6 + "\"> </a>" //
       + "<a href=\"" + link7 + "\"> </a>" //
       + "<a href=\"" + link8 + "\"> </a>" //
       + "<a href=\"" + link9 + "\"> </a>"; //
     final Record inputRecord = createInputRecord(htmlString);
     doTest(inputRecord, testData);
   }

   /** do testing with nekohtml and tagsoup html parser. */
   private void doTest(final Record inputRecord, final Map<String, String> testData) throws Exception {
     // neko
     _extractor.setLinkExtractorHtml(new LinkExtractorHtmlNeko());
     Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
     checkResults(new ArrayList<String>(testData.values()), result);
     // tagsoup
     _extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
     result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
     checkResults(new ArrayList<String>(testData.values()), result);
   }

   /** helper method to create html input from map with URIs. */
   private String createHtmlString(final Map<String, String> uriMap) {
     String s = "<html>";
     for (final String uri : uriMap.keySet()) {
       s = s + "<a href=\"" + uri + "\"/>";
       s = s + "<irgendein> html <schrott> der dazwischen <steht>";
     }
     return s;
   }

   /** helper method to create input record from html. */
   private Record createInputRecord(final String htmlContent) {
     final Record r = DataFactory.DEFAULT.createRecord();
     final byte[] htmlBytes = htmlContent.getBytes();
     r.setAttachment(WebCrawlerConstants.ATTACHMENT_CONTENT, htmlBytes);
     r.getMetadata().put(WebCrawlerConstants.ATTRIBUTE_URL, BASE_URI_WITH_PATH_AND_FILE);
     return r;
   }

   /** helper method to check the extracted links. */
   private void checkResults(final List<String> expectedLinks, final Collection<Record> actualRecords) {
     while (expectedLinks.contains(null)) {
       expectedLinks.remove(null);
     }
     assertEquals(expectedLinks.size(), actualRecords.size());
     final List<String> actualLinks = new ArrayList<String>();
     for (final Record r : actualRecords) {
       actualLinks.add(r.getMetadata().getStringValue(WebCrawlerConstants.ATTRIBUTE_URL));
     }
     Collections.sort(expectedLinks);
     Collections.sort(actualLinks);
     assertEquals(expectedLinks, actualLinks);
   }
 }
	/*******************************************************************************
	* Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
	* accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
	* distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors: Andreas Weber (Attensity Europe GmbH) - initial API and implementation
	*******************************************************************************/
	package org.eclipse.smila.importing.crawler.web.test;

	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;

	import org.eclipse.smila.datamodel.DataFactory;
	import org.eclipse.smila.datamodel.Record;
	import org.eclipse.smila.importing.crawler.web.LinkExtractor;
	import org.eclipse.smila.importing.crawler.web.WebCrawlerConstants;
	import org.eclipse.smila.importing.crawler.web.extractor.DefaultLinkExtractor;
	import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtmlNeko;
	import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtmlSoup;

	/** Test for {@link DefaultLinkExtractor} class. */
	public class TestDefaultLinkExtractor extends WebExtractorTestBase {

	private static final String BASE_URI = "http://www.attensity.com";

	private static final String BASE_URI_WITH_PATH = BASE_URI + "/p";

	private static final String BASE_URI_WITH_PATH_AND_FILE = BASE_URI_WITH_PATH + "/test.html";

	private DefaultLinkExtractor _extractor;

	@Override
	protected void setUp() throws Exception {
	super.setUp();
	_extractor = (DefaultLinkExtractor) getService(LinkExtractor.class);
	}

	/** test for extracting absolute link. */
	public void testSimpleHref() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	final String link = BASE_URI_WITH_PATH + "/link.html";
	testData.put(link, link);
	final String htmlString = "<html> <a href=\"" + link + "\"/> </html>";
	final Record inputRecord = createInputRecord(htmlString);
	doTest(inputRecord, testData);
	}

	/** test for extracting absolute link with incomplete html. */
	public void testSimpleHrefIncompleteHtml() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	final String link = BASE_URI_WITH_PATH + "/link.htm";
	testData.put(link, link);
	final String htmlString = "<html> <a href=\"" + link + "\"";
	final Record inputRecord = createInputRecord(htmlString);
	doTest(inputRecord, testData);
	}

	/** test for extracting relative links. */
	public void testRelativeLinks() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	// key: test link, value: expected absolute link
	testData.put("/link1.html", BASE_URI + "/link1.html");
	testData.put("link2.html", BASE_URI_WITH_PATH + "/link2.html");
	testData.put("../link3.htm", BASE_URI + "/link3.htm");
	testData.put("./link4.htm", BASE_URI_WITH_PATH + "/link4.htm");
	final String htmlString = createHtmlString(testData);
	final Record inputRecord = createInputRecord(htmlString);
	doTest(inputRecord, testData);
	}

	/**
	* tests that links can be extracted even if HTML is malformed. This test succeeds for tagsoup but fails for nekohtml.
	*/
	public void testMalformedHtml() throws Exception {
	final String link1 = BASE_URI_WITH_PATH + "/link1.html";
	final String link2 = BASE_URI_WITH_PATH + "/link2.html";
	final Map<String, String> testData = new HashMap<String, String>();
	testData.put(link1, link1);
	testData.put(link2, link2);
	final String htmlString = "<p> <title> </p> " //
	+ "<a href=\"" + link1 + "\"> </A>" //
	+ "<p> </title> </p> </p>" //
	+ "<A HrEF=\"" + link2 + "\"> </a>"; //
	final Record inputRecord = createInputRecord(htmlString);
	// this only works for tagsoup!
	_extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
	final Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
	checkResults(new ArrayList<String>(testData.values()), result);
	}

	/** test for extracting links from 'FRAME' element. */
	public void testFrameLink() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	final String link1 = BASE_URI_WITH_PATH + "/navigation_Left.htm";
	final String link2 = BASE_URI_WITH_PATH + "/introduction.htm";
	testData.put(link1, link1);
	testData.put(link2, link2);
	final String htmlString = "<frameset> " //
	+ "<frame name=\"Navigation_Frame\" src=\"navigation_Left.htm\" marginheight=\"0\" marginwidth=\"0\"/>" //
	+ "<FRAME name=\"ContentFrame\" SRC=\"introduction.htm\" marginheight=\"0\" marginwidth=\"0\"/>" //
	+ "</frameset>";
	final Record inputRecord = createInputRecord(htmlString);
	// this only works for tagsoup!
	_extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
	final Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
	checkResults(new ArrayList<String>(testData.values()), result);
	}

	/** test for extracting links from 'IMG' element. */
	public void testImageLink() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	final String link1 = BASE_URI_WITH_PATH + "/icon.gif";
	final String link2 = BASE_URI_WITH_PATH + "/images/picture.jpg";
	testData.put(link1, link1);
	testData.put(link2, link2);
	final String htmlString = "<html><body><img src=\"" + link1 + "\"> <img src=\"" + link2 + "\"></body></html>";
	final Record inputRecord = createInputRecord(htmlString);
	// this only works for tagsoup!
	_extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
	final Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
	checkResults(new ArrayList<String>(testData.values()), result);
	}

	/** test with unescaped link. Hint: We can't extract unescaped relative links at the moment */
	public void testUnescapedLink() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	final String link1 = BASE_URI_WITH_PATH + "/test1.html";
	final String linkUnescapedRel = "this link is not escaped"; // this can not be extracted!
	final String linkUnescapedAbs = "http://www.attensity.com/this link is not escaped/?query=a b";
	final String link2 = BASE_URI_WITH_PATH + "/test2.html";
	testData.put(link1, link1);
	testData.put(linkUnescapedAbs, "http://www.attensity.com/this%20link%20is%20not%20escaped/?query=a%20b");
	testData.put(link2, link2);
	final String htmlString = "<title>" //
	+ "<a href=\"" + link1 + "\"> </a>" //
	+ "<a href=\"" + linkUnescapedRel + "\"> </a>" //
	+ "<a href=\"" + linkUnescapedAbs + "\"> </a>" //
	+ "<a href=\"" + link2 + "\"> </a>"; //
	final Record inputRecord = createInputRecord(htmlString);
	doTest(inputRecord, testData);
	}

	/** test with escaped link. */
	public void testEscapedLink() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	final String link1 = BASE_URI_WITH_PATH + "/test1.html";
	final String linkEscapedAbs = "http://www.attensity.com/p/this%20link%20is%20escaped/?query=a%20b";
	final String linkEscapedRel = "this%20link%20is%20escaped/?query=a%20b";
	final String link2 = BASE_URI_WITH_PATH + "/test2.html";
	testData.put(link1, link1);
	testData.put(linkEscapedAbs, linkEscapedAbs);
	testData.put(linkEscapedRel, linkEscapedAbs);
	testData.put(link2, link2);
	final String htmlString = "<title>" //
	+ "<a href=\"" + link1 + "\"> </a>" //
	+ "<a href=\"" + linkEscapedAbs + "\"> </a>" //
	+ "<a href=\"" + linkEscapedRel + "\"> </a>" //
	+ "<a href=\"" + link2 + "\"> </a>"; //
	final Record inputRecord = createInputRecord(htmlString);
	doTest(inputRecord, testData);
	}

	/** test link normlization. */
	public void testLinkNormalization() throws Exception {
	final Map<String, String> testData = new HashMap<String, String>();
	final String link1 = "HTtp://WWW.Attensity.com:8080/Test1.html"; // -> scheme and host are converted to lower case
	final String link2 = "http://www.attensity.com#fragment"; // -> fragment parts are removed
	final String link3 = "http://www.attensity.com/?Query=q&query2=q"; // -> query parts remain
	final String link4 = "http://www.attensity.com:80/port"; // -> default port 80 is removed
	final String link5 = "http://www.attensity.com/test unescaped"; // -> link will be escaped
	final String link6 = "http://www.attensity.com/test%20escaped"; // -> escaped link remains
	final String link7 = "http://www.attensity.com/path/../path2"; // -> path is normalized
	final String link8 = "javascript:void(0);"; // invalid
	final String link9 = "mailto:andreas.weber@empolis.com"; // invalid

	testData.put(link1, "http://www.attensity.com:8080/Test1.html");
	testData.put(link2, "http://www.attensity.com");
	testData.put(link3, link3);
	testData.put(link4, "http://www.attensity.com/port");
	testData.put(link5, "http://www.attensity.com/test%20unescaped");
	testData.put(link6, link6);
	testData.put(link7, "http://www.attensity.com/path2");
	final String htmlString = "<title>" //
	+ "<a href=\"" + link1 + "\"> </a>" //
	+ "<a href=\"" + link2 + "\"> </a>" //
	+ "<a href=\"" + link3 + "\"> </a>" //
	+ "<a href=\"" + link4 + "\"> </a>" //
	+ "<a href=\"" + link5 + "\"> </a>" //
	+ "<a href=\"" + link6 + "\"> </a>" //
	+ "<a href=\"" + link7 + "\"> </a>" //
	+ "<a href=\"" + link8 + "\"> </a>" //
	+ "<a href=\"" + link9 + "\"> </a>"; //
	final Record inputRecord = createInputRecord(htmlString);
	doTest(inputRecord, testData);
	}

	/** do testing with nekohtml and tagsoup html parser. */
	private void doTest(final Record inputRecord, final Map<String, String> testData) throws Exception {
	// neko
	_extractor.setLinkExtractorHtml(new LinkExtractorHtmlNeko());
	Collection<Record> result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
	checkResults(new ArrayList<String>(testData.values()), result);
	// tagsoup
	_extractor.setLinkExtractorHtml(new LinkExtractorHtmlSoup());
	result = _extractor.extractLinks(inputRecord, _webCrawlingContext);
	checkResults(new ArrayList<String>(testData.values()), result);
	}

	/** helper method to create html input from map with URIs. */
	private String createHtmlString(final Map<String, String> uriMap) {
	String s = "<html>";
	for (final String uri : uriMap.keySet()) {
	s = s + "<a href=\"" + uri + "\"/>";
	s = s + "<irgendein> html <schrott> der dazwischen <steht>";
	}
	return s;
	}

	/** helper method to create input record from html. */
	private Record createInputRecord(final String htmlContent) {
	final Record r = DataFactory.DEFAULT.createRecord();
	final byte[] htmlBytes = htmlContent.getBytes();
	r.setAttachment(WebCrawlerConstants.ATTACHMENT_CONTENT, htmlBytes);
	r.getMetadata().put(WebCrawlerConstants.ATTRIBUTE_URL, BASE_URI_WITH_PATH_AND_FILE);
	return r;
	}

	/** helper method to check the extracted links. */
	private void checkResults(final List<String> expectedLinks, final Collection<Record> actualRecords) {
	while (expectedLinks.contains(null)) {
	expectedLinks.remove(null);
	}
	assertEquals(expectedLinks.size(), actualRecords.size());
	final List<String> actualLinks = new ArrayList<String>();
	for (final Record r : actualRecords) {
	actualLinks.add(r.getMetadata().getStringValue(WebCrawlerConstants.ATTRIBUTE_URL));
	}
	Collections.sort(expectedLinks);
	Collections.sort(actualLinks);
	assertEquals(expectedLinks, actualLinks);
	}
	}