blob: 6e5edb154b86f9d0834130e8d98909b468b4df8a [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004, 2008 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Kentarou FUKUDA - initial API and implementation
*******************************************************************************/
package org.eclipse.actf.visualization.engines.blind.util;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import org.eclipse.actf.visualization.engines.blind.BlindVizEnginePlugin;
import org.eclipse.actf.visualization.engines.blind.ui.preferences.IBlindPreferenceConstants;
import org.eclipse.core.runtime.FileLocator;
import org.eclipse.core.runtime.Path;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.Preferences;
public class TextChecker {
public static String KIGOU = "(\\p{InMathematicalOperators}|\\p{InGeometricShapes}|\\p{InMiscellaneousSymbols}|\\p{InBoxDrawing}|\\p{InGeneralPunctuation}|\\p{InCJKSymbolsandPunctuation}|\\p{InArrows})";
public static String NIHONGO = "(\\p{InCJKUnifiedIdeographs}|\\p{InHiragana}|\\p{InKatakana})";
public static String KANJI = "(\\p{InCJKUnifiedIdeographs})";
private static final String ALT_TEXT_PROPERTIES_FILE = "altText.properties";
private static String INAPP_ALT = "blindViz.inappropriateAlt_";
private static String POSSIBLE_INAPP_ALT = "blindViz.possible_inappAlt_";
private static TextChecker INSTANCE;
private Set<String> ngwordset = new TreeSet<String>();
private Set<String> ngwordset2 = new TreeSet<String>();
private Preferences pref = BlindVizEnginePlugin.getDefault().getPluginPreferences();
// TODO spell out check
// separated from VisualizeEngine
private TextChecker() {
if (!pref.getBoolean(IBlindPreferenceConstants.NOT_FIRST_TIME)) {
Properties prop = new Properties();
try {
InputStream prefIS = FileLocator.openStream(Platform
.getBundle(BlindVizEnginePlugin.PLUGIN_ID), new Path(
"config/"+ALT_TEXT_PROPERTIES_FILE), false);
if (prefIS != null) {
prop.load(prefIS);
}
} catch (Exception e) {
e.printStackTrace();
}
for (Object key : prop.keySet()) {
String keyS = (String) key;
String value;
if (keyS.startsWith(INAPP_ALT)) {
value = prop.getProperty(keyS);
if (value.length() > 0) {
ngwordset.add(value);
}
} else if (keyS.startsWith(POSSIBLE_INAPP_ALT)) {
value = prop.getProperty(keyS);
if (value.length() > 0) {
ngwordset2.add(value);
}
}
}
resetPreferences();
} else {
for (int i = 0; pref.contains(INAPP_ALT + i); i++) {
String value = pref.getString(INAPP_ALT + i);
if (value.length() > 0) {
ngwordset.add(value);
}
}
for (int i = 0; pref.contains(POSSIBLE_INAPP_ALT + i); i++) {
String value = pref.getString(POSSIBLE_INAPP_ALT + i);
if (value.length() > 0) {
ngwordset2.add(value);
}
}
}
}
public static TextChecker getInstance() {
if (INSTANCE == null) {
INSTANCE = new TextChecker();
}
return INSTANCE;
}
public boolean isRedundantText(String prevText, String curText) {
if ((prevText != null) && (prevText.length() > 1) && (curText.length() > 1)) {
String prevText2 = prevText.replaceAll("\\[|\\]|\\.|\\!|\\>", "");
prevText2 = prevText2.trim();
String curText2 = curText.replaceAll("\\[|\\]|\\.|\\!|\\>", "");
curText2 = curText2.trim();
if (curText2.equalsIgnoreCase(prevText2)) {
return true;
}
}
return (false);
}
public boolean isInappropriateAlt(String alt) {
String tmpAlt = alt.trim();
tmpAlt = tmpAlt.toLowerCase();
if (ngwordset.contains(tmpAlt) || isEndWithImageExt(tmpAlt)) {
return true;
} else {
return false;
}
}
public int isInappropriateAlt2(String alt) {
String[] tmpSA = alt.toLowerCase().split("(" + KIGOU + "|\\p{Punct}|\\p{Space})");
int count = 0;
int all = 0;
int charLength = 0;
for (int i = 0; i < tmpSA.length; i++) {
if (tmpSA[i].length() > 0) {
all++;
}
charLength += tmpSA[i].length();
if (ngwordset2.contains(tmpSA[i])) {
// System.out.println("alt: "+tmpSA[i]);
count++;
}
}
int org = alt.length();
// TODO combination
if (org > 0 && alt.matches(".*(\\p{Alpha}\\p{Space}){4,}.*")) {//TODO 4 is appropriate?
return 3;
}
// TODO Japanese check
if (org > 0 && ((double) charLength / (double) org) < 0.5) {
// TODO divide error (use ratio)
//spaces
if (!alt.matches("\\p{Space}*")) {
return 1;
}
}
// System.out.println(count+" "+all+":"+(double)count/(double)all);
if ((double) count / (double) all > 0.6) {
return 2;
} else if ((double) count / (double) all > 0.3) {
return 1;
}
return 0;
}
private boolean isEndWithImageExt(String alt) {
String tmpS = alt.trim().toLowerCase();
String regexp3 = "\\p{Print}*\\.(jpg|jpeg|gif|png)";
return (tmpS.matches(regexp3));
}
public boolean isSeparatedJapaneseChars(String target) {
String tmpS = target.trim();
tmpS = tmpS.toLowerCase();
//\u3000 = double byte space
String regexp1 = KIGOU + "*(\\p{Space}|\u3000)?(" + NIHONGO + "((\\p{Space}|\u3000)+" + NIHONGO + ")+)"
+ "(\\p{Space}|\u3000)?" + KIGOU + "*";
String regexp2 = ".*" + KANJI + ".*";
//TODO rewrite regexp (combination of Japanese and English)
if ((tmpS.matches(regexp1) && tmpS.matches(regexp2))) {
return true;
} else {
return (false);
}
}
public Set<String> getInappropriateALTSet() {
Set<String> tmpSet = new HashSet<String>();
tmpSet.addAll(ngwordset);
return tmpSet;
}
private void resetPreferences() {
int i = 0;
for (String value : ngwordset) {
pref.setValue(INAPP_ALT + i, value);
i++;
}
for (int j = i; pref.contains(INAPP_ALT + j); j++) {
pref.setValue(INAPP_ALT + j, "");
}
i = 0;
for (String value : ngwordset2) {
pref.setValue(POSSIBLE_INAPP_ALT + i, value);
i++;
}
for (int j = i; pref.contains(POSSIBLE_INAPP_ALT + j); j++) {
pref.setValue(POSSIBLE_INAPP_ALT + j, "");
}
}
public void setNewAltSet(Set<String> inappAltSet) {
if (inappAltSet != null) {
ngwordset = inappAltSet;
}
resetPreferences();
}
}