blob: 6a209449a1dd1d48e89a920f63c4b60c013a4aa4 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2007 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Masatomo KOBAYASHI - initial API and implementation
*******************************************************************************/
package org.eclipse.actf.ai.navigator.userinfo.impl;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.actf.ai.fennec.treemanager.ITreeItem;
import org.eclipse.actf.model.ui.editor.browser.IWebBrowserACTF;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
public class AltTextGuesser {
private final IWebBrowserACTF browser;
private final ITreeItem item;
public AltTextGuesser(IWebBrowserACTF browser, ITreeItem item) {
this.browser = browser;
this.item = item;
}
public String getDefaultText() {
return item.getUIString();
}
public String guessByOCR() {
throw new RuntimeException("Not implemented.");
}
public String guessByContext() {
return getContext(browser, getContext(item));
}
private String getContext(ITreeItem item) {
Object o = item.getBaseNode();
if (!(o instanceof Node))
return null;
for (Node n = (Node) o; n != null; n = n.getParentNode()) {
// System.err.println(n.getClass());
if (n instanceof Document)
break;
String name = n.getLocalName();
// I'd like to use getLinkURI!!
if ("A".equals(name)) {
// System.out.println(n.getClass());
NamedNodeMap v = n.getAttributes();
if (v == null)
continue;
Node a = v.getNamedItem("href");
if (a == null)
continue;
return a.getNodeValue();
}
}
return null;
}
private String getContext(IWebBrowserACTF browser, String path) {
try {
URL url = new URL(new URL(browser.getURL()), path);
String s = null;
String title = "", h1 = "";
Pattern pattern = Pattern.compile("<(?:(title)|(h1))[^>]*>([^<]*)</(?:title|h1)>" , Pattern.CASE_INSENSITIVE);
BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()));
while ((s = br.readLine()) != null) { // TODO ad-hoc
Matcher matcher = pattern.matcher(s);
if (!matcher.matches())
continue;
if (matcher.group(1) != null)
title = matcher.group(3);
if (matcher.group(2) != null)
h1 = matcher.group(3);
if (title.length() > 0 && h1.length() > 0)
break;
}
br.close();
System.out.println("URL\t" + url);
System.out.println("title\t" + title);
System.out.println("h1\t" + h1);
return title.length() > h1.length() ? title : h1;
} catch (MalformedURLException e) {
// e.printStackTrace();
return null;
} catch (IOException e) {
// e.printStackTrace();
return null;
}
}
}