blob: cec056ad6f72d20a22e15e9dfa13440ec1089e5f [file] [log] [blame]
package org.eclipse.stem.utility.generators;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
* Copyright (c) 2006 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* Contributors:
* IBM Corporation - initial API and implementation
* STEM II Data Migration/Generation Utilities : Instances of this utility class
* generate the files for all countries.
* To configure, see configuration file :
* org.eclipse.stem.utility/configuration/AreaGenerator.config
* To run, add path to configuration file in the run profile (i.e.
* C:\stemII\org.eclipse.stem.utility\configuration\AreaGenerator.config)
* SET VMARGS TO : -Xms1024m -Xmx1024m
public class AreaGenerator {
private String iso3166_1 = null;
private String iso3166_2 = null;
private String fixFile = null;
private String targetFile = null;
private String areaData = null;
private String polygonAreaData = null;
private String lexDir = null;
private String outputDir = null;
private String workspace = null;
* Lists for ISO 3166-1 codes, target countries, and population data.
List targetList = null;
List codeList = null;
List codefixList = null;
List areaList = null;
List level1PolygonAreaList = null;
List level2PolygonAreaList = null;
List usaLevel2List = null;
* Hash set for storing the source URLs relevant to area data.
Set<String> urlSet = null;
* Hash map to keep track of which sources will be shown commented out and
* which will appear active.
HashMap<String, Boolean> urlStatus = null;
* String used to indicate default value for unknown areas.
String DEFAULT = "0"; //$NON-NLS-1$
* Constructor
* @param iso3166_1 -
* data file with the ISO3166-1 ALPHA_2 and ALPHA_3 codes.
* @param iso3166_2 -
* file with the ISO3166-2 data used for generating the IDs.
* @param fixFile -
* file containing missing ISO3166-2 codes
* @param areaData -
* directory conaining aread data for each country in data set.
* @param targetFile -
* data file where we find a list of countries for generation of
* their corresponding files.
* @param polygonAreaData -
* the path to area data in polygon units
* @param lexDir
* folder containing lexicographically sorted files
* @param outputDir -
* the output directory for the files generated.
// public AreaGenerator(String iso3166_1, String iso3166_2, String fixFile,
// String areaData, String targetFile, String polygonAreaData,
// String lexDir, String outputDir) {
// // Set values of global variables.
// this.iso3166_1 = iso3166_1;
// this.iso3166_2 = iso3166_2;
// this.fixFile = fixFile;
// this.targetFile = targetFile;
// this.areaData = areaData;
// this.polygonAreaData = polygonAreaData;
// this.lexDir = lexDir;
// this.outputDir = outputDir;
// targetList = null;
// // Set the URL IDs that are relevant for area data.
// initAreaURLSet();
// // Set which URLs are active and which ones should be commented out.
// setURLStatus();
// }
* @param configFile
public AreaGenerator(String configFile) {
// Load all configuration variables
HashMap<String, String> vars = GenUtils.loadConfigFile(configFile);
Set<String> keys = vars.keySet();
Iterator it = keys.iterator();
while (it.hasNext()) {
String var = (String);
// Assign values to configuration variables :
if (var.equals("WORKSPACE")) { //$NON-NLS-1$
workspace = vars.get(var);
} else if (var.equals("ISO3166_1")) { //$NON-NLS-1$
iso3166_1 = vars.get(var);
} else if (var.equals("ISO3166_2")) { //$NON-NLS-1$
iso3166_2 = vars.get(var);
} else if (var.equals("ISO_FIX")) { //$NON-NLS-1$
fixFile = vars.get(var);
} else if (var.equals("TARGET_LIST")) { //$NON-NLS-1$
targetFile = vars.get(var);
} else if (var.equals("POLYGON_AREAS")) { //$NON-NLS-1$
polygonAreaData = vars.get(var);
} else if (var.equals("AREA_POP")) { //$NON-NLS-1$
areaData = vars.get(var);
} else if (var.equals("LEX_DIR")) { //$NON-NLS-1$
lexDir = vars.get(var);
} else if (var.equals("PROPERTIES_OUTPUT")) { //$NON-NLS-1$
outputDir = vars.get(var);
} // while
// The workspace variable should end with a slash character
if (workspace.endsWith("\\") == false) { //$NON-NLS-1$
workspace += "\\"; //$NON-NLS-1$
String path = workspace + GenConstants.PARAMS_DIR;
// Set remaining config vars using WORKSPACE (workspace relative)
if (iso3166_1 == null) {
iso3166_1 = path + GenConstants.ISO3166_1_FILE;
if (iso3166_2 == null) {
iso3166_2 = path + GenConstants.ISO3166_2_FILE;
if (fixFile == null) {
fixFile = path + GenConstants.ISOFIX_FILE;
if (targetFile == null) {
targetFile = path + GenConstants.GROUPED_COUNTRIES;
if (polygonAreaData == null) {
polygonAreaData = path + GenConstants.POLYGON_AREAS_DIR;
if (areaData == null) {
areaData = path + GenConstants.AREA_POPULATION_DIR;
if (lexDir == null) {
lexDir = path + GenConstants.LEX_DIR;
if (outputDir == null) {
outputDir = workspace + GenConstants.PROPERTIES_OUTPUT_DIR;
targetList = null;
// Set the URL IDs that are relevant for area data.
// Set which URLs are active and which ones should be commented out.
* Sets the URL IDs relevant for area data. We are only interested in URLs
* with ID = DIVA. This can be different for different types of data. For
* example, for population data, we are interested on ID = CIA
protected void initAreaURLSet() {
urlSet = new HashSet<String>();
urlSet.add(new String("CIA")); //$NON-NLS-1$
urlSet.add(new String("USCENSUSAREA")); //$NON-NLS-1$
urlSet.add(new String("USCENSUSBOUNDS")); //$NON-NLS-1$
urlSet.add(new String("USCENSUSPUERTORICO")); //$NON-NLS-1$
urlSet.add(new String("CITYPOPULATION")); //$NON-NLS-1$
urlSet.add(new String("UNITEDNATIONS")); //$NON-NLS-1$
* Set which URL appears as uncommented or ACTIVE and which ones appear as
* commented out or SECONDARY. Only URL should be active. All others should
* appear commented.
protected void setURLStatus() {
urlStatus = new HashMap<String, Boolean>();
// This will be the active URL.
urlStatus.put("CIA", new Boolean(true)); //$NON-NLS-1$
// All other URLs appear as commented out, secondary.
urlStatus.put("USCENSUSAREA", new Boolean(false)); //$NON-NLS-1$
urlStatus.put("USCENSUSBOUNDS", new Boolean(false)); //$NON-NLS-1$
urlStatus.put("USCENSUSPUERTORICO", new Boolean(false)); //$NON-NLS-1$
urlStatus.put("CIA", new Boolean(false)); //$NON-NLS-1$
urlStatus.put("CITYPOPULATION", new Boolean(false)); //$NON-NLS-1$
urlStatus.put("UNITEDNATIONS", new Boolean(true)); //$NON-NLS-1$
* This method sets up and runs this generator.
public void run() {
// Read and keep in memory a list of the countries we want.
targetList = GenUtils.populateList(targetFile);
// Read and keep in memory a list of the ISO3166-1 ALPHA2, ALPHA3, and
// numeric codes.
codeList = GenUtils.populateList(iso3166_1);
// Read and keep in memory a list of the missing ISO3166-2 codes
codefixList = GenUtils.populateList(fixFile);
usaLevel2List = GenUtils.populateList(GenConstants.USA_LEVEL_2_AREA);
// Now, process each item in the target list.
* Process our global list of target countries. For each country in list,
* generate its file.
protected void process() {
final int LEVEL_1 = 1;
final int LEVEL_2 = 2;
// A writer for the new file we are creating.
PrintWriter propertiesFile = null;
// Object holding ISO3166-1 related data.
ISOCode1 isoCode = null;
// Name of the new properties file.
String fileName = null;
// A list for ISO 3166-2 objects
List iso2List = new ArrayList();
// Level 0 area in square kilometers
String level0AreaSqKm = null;
// Aggregate polygon area for level 1
String aggregateLevel1PolygonArea = null;
// Value used as default to compute the
// approximate area of an unknown location
String defaultAreaSqKm = DEFAULT;
String defaultPolygonArea = DEFAULT;
// Filenames for lex1 and lex2 files for a given country.
String lexAdmin1File = null;
String lexAdmin2File = null;
// Lists holding the lex1 and lex2 values.
// NOTE: lex values are the lexicographically sorted values for
// administration levels.
List<String> lex1DataSet = null;
List<String> lex2DataSet = null;
// Lists for holding lex1 and lex2 objects for a given country.
List<Lex1Obj> lex1List = null;
List<Lex2Obj> lex2List = null;
int size = targetList.size();
// Iterate for every country and create its file.
for (int i = 0; i < size; i++) {
// Get the next country in the list.
String file = ((String) targetList.get(i)).trim();
// An index for unknwon or n.a. entries
int unkCounter = 1;
// Open the data source file for this country.
BufferedReader reader = GenUtils.openReader(file);
.println("\t\t<<<< AREA GENERATOR -- Processing country : " + file + " >>>>"); //$NON-NLS-1$ //$NON-NLS-2$
try {
if (reader != null) {
String buffer = null;
// Describes level
// of data we
// are now processing : LEVEL0, LEVEL1, or LEVEL2.
int level = -1;
// Record the previous level.
int oldLevel = level - 1;
// We take a chunk of the data [BUFFER_MIN,BUFFER_MAX] to
// make processing more
// efficient, we dont need all of it.
int BUFFER_MIN = 0;
int BUFFER_MAX = 120;
List<String> entryList = null;
while (GenUtils.EOF(buffer = reader.readLine()) != true) {
// Make sure that we mark all unknown data.
buffer = buffer.replace(",,", ",UNKNOWN,"); //$NON-NLS-1$ //$NON-NLS-2$
// Use only part of the buffer, not all of it for
// efficieny.
String[] items = GenUtils
.extract(buffer.length() < BUFFER_MAX ? buffer
: buffer.substring(BUFFER_MIN,
// Get the ISO3166-1 info for this country
if (isoCode == null) {
isoCode = GenUtils.getISOCode(
items[ISOData.COUNTRY_NAME], codeList);
// Read and keep in memory a list of the area data
// for this country.
if (areaList == null) {
String name = areaData + isoCode.getName()
+ "_AREA.txt"; //$NON-NLS-1$
areaList = GenUtils.populateList(name);
// Get the level of the data we are processing.
level = GenUtils.getDataLevel(items);
if (level != 0 && iso2List != null) {
// Load the ISO 3166-2 objects for this country
iso2List = GenUtils.loadISO2(iso3166_2);
// This variable indicates a transition in our data
// processing
boolean transition = (level != oldLevel) ? true : false;
// Variable used to hold the formatted output data.
String data = null;
// The current entity we are dealing with.
// Based on its admin level it can be a country, a
// county, etc;
String current = null;
// Descriptor for current entry.
String descriptor = null;
if (transition == true) {
// Keep track of previous level in order to detect
// transitions.
oldLevel = level;
// Reset our repeated history
if (entryList != null) {
entryList = new ArrayList<String>();
// Reset counter of UNKNOWN and N.A. entries.
unkCounter = 1;
// Close previous file if it is still open.
if (propertiesFile != null) {
UtilLogger.logInformation("\t\t<<Generated : " //$NON-NLS-1$
+ fileName + ">>"); //$NON-NLS-1$
propertiesFile = null;
fileName = null;
// Read and keep in memory a list of the level 1
// area data
// in polygon units for this country.
if (level1PolygonAreaList == null && level == 1) {
// Load the polygon area data for level 1.
String name = polygonAreaData
+ isoCode.getAlpha3()
+ "\\" //$NON-NLS-1$
+ isoCode.getAlpha3()
+ "_1_POLYAREA.txt"; //$NON-NLS-1$
// Load level 1 polygon area list
level1PolygonAreaList = GenUtils
// Find the level 0 area in square kilometers
level0AreaSqKm = getArea(
items[ISOData.COUNTRY_NAME], areaList,
// Find the aggregate polygon area for level 1
aggregateLevel1PolygonArea = getAggregateArea(level1PolygonAreaList);
// Read and keep in memory a list of the level 2
// area data
// in polygon units for this country.
if (level2PolygonAreaList == null && level == 2) {
// Load the polygon area data for level 2.
String name = polygonAreaData
+ isoCode.getAlpha3()
+ "\\" + isoCode.getAlpha3() //$NON-NLS-1$
+ "_2_POLYAREA.txt"; //$NON-NLS-1$
level2PolygonAreaList = GenUtils
// If the directory does not exist for this country,
// create it using ISO alpha3 code.
File countryDir = new File(outputDir
+ isoCode.getAlpha3());
if (countryDir != null && countryDir.exists() == false) {
// Directory does not exist. Create it.
// Create and open the new file.
if (propertiesFile == null) {
// Create the file name for the new properties.file.
fileName = countryDir + "\\" + isoCode.getAlpha3() //$NON-NLS-1$
+ "_" + level + "_" + ""; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
// Create tne new properties.file for this country.
propertiesFile = GenUtils.openWriter(fileName);
// If we have switched to a different data level (i,e,
// admin 0 to admin 1)
if (transition == true) {
// Just add the header.
data = getHeader(isoCode, level);
GenUtils.addData(propertiesFile, data);
// Forget the data we just wrote.
data = null;
switch (level) {
case DataTypes.LEVEL_0:
if (transition == true) {
data = "# Country\n"; //$NON-NLS-1$
data += isoCode.getAlpha3()
+ " = " //$NON-NLS-1$
+ getArea(items[ISOData.COUNTRY_NAME],
level) + "\n"; //$NON-NLS-1$
// Write formatted output
GenUtils.addData(propertiesFile, data);
case DataTypes.LEVEL_1:
current = items[ISOData.ADMIN1_NAME].toUpperCase();
case DataTypes.LEVEL_2:
current = items[ISOData.ADMIN2_NAME].toUpperCase();
} // switch
// Handle UNKNOWN and N.A. entries differently.
if (current.equals("UNKNOWN") || current.equals("N.A.")) { //$NON-NLS-1$ //$NON-NLS-2$
if (level == 1) {
current = isoCode.getAlpha2() + "-UNK-" //$NON-NLS-1$
+ String.valueOf(unkCounter++);
} else {
// Level 2 data.
current = isoCode.getAlpha3() + ".L2." //$NON-NLS-1$
+ String.valueOf(unkCounter++);
// Verify for repeated entries -- avoid duplcates
switch (level) {
case LEVEL_1:
if (GenUtils.isRepeated(current, entryList) == true) {
// Duplicated, ignore.
// Remember this entity to avoid duplicates.
case LEVEL_2:
if (GenUtils.isRepeated(items[ISOData.ADMIN1_NAME]
+ current, entryList) == true) {
// Duplicated, ignore.
// Remember this entity to avoid duplicates.
entryList.add(items[ISOData.ADMIN1_NAME] + current);
// We get the descriptor for administration entities
// differently based on its level.
if (level == 2) {
if (lex1DataSet == null && lex2DataSet == null) {
// Compose the file names for the lex files for
// this country
lexAdmin1File = lexDir + isoCode.getAlpha3()
+ "\\" + isoCode.getAlpha3(); //$NON-NLS-1$
lexAdmin2File = lexAdmin1File;
lexAdmin1File += "_ADMIN1_LEX.txt"; //$NON-NLS-1$
lexAdmin2File += "_ADMIN2_LEX.txt"; //$NON-NLS-1$
// Load the lex1 and lex2 files for this
// country.
lex1DataSet = GenUtils
lex2DataSet = GenUtils
// Create a method to search for an item on a
// list
// of lex data objects.
lex1List = GenUtils.createLex1List(lex1DataSet);
lex2List = GenUtils.createLex2List(lex2DataSet);
// Compose the level2 descriptor using the lex data:
// STEP 1: Do a look up in the lex 2 lists.
// Get the lex 2 object for the current entry.
Lex2Obj lex2Current = GenUtils.lex2Lookup(lex2List,
items[ISOData.ADMIN1_NAME], current);
// Get the lex 1 object to which lex2Current belongs
// to.
Lex1Obj lex1Current = GenUtils.lex1Lookup(lex1List,
// STEP 2: piece together the ID using the
// proper lex objects : the USA is a special
// case
if (lex1Current.getDescriptor().contains("US-")) { //$NON-NLS-1$
descriptor = lex1Current.getDescriptor()
+ "-" //$NON-NLS-1$
+ lex1Current.getFormattedIndex()
+ lex2Current.getFormattedIndexUSA();
} else {
descriptor = lex1Current.getDescriptor()
+ "-G" //$NON-NLS-1$
+ lex1Current.getFormattedIndex()
+ lex2Current.getFormattedIndex();
// Add some commens to allow reverse engineering
// or backtracking the origin of this generated
// key as much as possible.
data = "# Generated key details : " //$NON-NLS-1$
+ " Level 1 ( " //$NON-NLS-1$
+ lex1Current.getDescriptor()
+ " = " //$NON-NLS-1$
+ lex1Current.getAdmin1()
+ "). " //$NON-NLS-1$
+ "Level 2" //$NON-NLS-1$
+ lex1Current.getFormattedIndex()
+ lex2Current.getFormattedIndex()
+ " = " + lex2Current.getAdmin2() //$NON-NLS-1$
+ "). \n"; //$NON-NLS-1$
} else { // Compose descriptor for level 1 and level
// 0 data.
// Get the descriptor for this entity.
descriptor = GenUtils.getISO2Descriptor(iso2List,
current, isoCode.getAlpha2());
// If we cant find the descriptor, then look in the
// fix list.
if (descriptor.equals(current) == true) {
// Look for a descriptor in the fix file
descriptor = GenUtils.getFixCode(codefixList,
current, items[ISOData.COUNTRY_NAME]);
// If there is no descriptor still, generate
// one!
if (descriptor.equals(current) == true) {
descriptor = isoCode.getAlpha2() + "-" //$NON-NLS-1$
+ current;
data = "#" + current + "\n"; //$NON-NLS-1$ //$NON-NLS-2$
String area = DEFAULT;
String level1AreaSqKm = DEFAULT;
switch (level) {
case 1:
if (descriptor.contains("UNK") == false) { //$NON-NLS-1$
area = getArea(current,
items[ISOData.COUNTRY_NAME], areaList,
} else {
// Use the following formula to compute
// approximate area :
area = getApproximateL1Area(current,
case 2:
// Treat the USA as a special case :
// for the USA we have a list of level 2 data.
if (descriptor.contains("US-")) { //$NON-NLS-1$
// Firt, find the ISOCode2 for this USA admin.
ISOCode2 code = GenUtils.getISO2Object(
iso2List, items[ISOData.ADMIN1_NAME],
// Second, find the area for this USA admin.
area = getUSAL2Area(items[ISOData.ADMIN2_NAME],
code.getIndex(), usaLevel2List);
if (descriptor.contains("UNK") == false) { //$NON-NLS-1$
// First, get the area in SQ KM for the level 1
// container of the current level 2 adminstratin
level1AreaSqKm = getArea(
items[ISOData.COUNTRY_NAME], areaList,
level - 1);
// Now get the approximate area in SQ KM for the
// level 2 administration
area = getApproximateL2Area(descriptor,
} else {
// Use the following formula to compute
// approximate area :
String level2PolygonArea = getPolygonArea(
descriptor, items[ISOData.ADMIN1_NAME],
level2PolygonAreaList, 2);
double ratio = 0.0;
double approxArea = 0.0;
if (Double.parseDouble(defaultPolygonArea) != 0) {
ratio = Double
* Double
approxArea = Double
* ratio;
area = Double.toString(approxArea);
UtilLogger.logInformation("UNKNOWN : " //$NON-NLS-1$
+ items[ISOData.ADMIN2_NAME]
+ "," + descriptor //$NON-NLS-1$
+ "," + area); //$NON-NLS-1$
if (defaultAreaSqKm == DEFAULT) {
defaultAreaSqKm = level1AreaSqKm;
defaultPolygonArea = getPolygonArea(
level1PolygonAreaList, 1);
if (data != null) {
data += descriptor + " = " + GenUtils.format(area) //$NON-NLS-1$
+ "\n"; //$NON-NLS-1$
} else {
data = descriptor + " = " + GenUtils.format(area) //$NON-NLS-1$
+ "\n"; //$NON-NLS-1$
// If area is not known (desfault value), then comment
// it
// if (area.equals(DEFAULT)) {
// System.out
// .println("VUK : " + descriptor + " , " + current
// //$NON-NLS-1$ //$NON-NLS-2$
// + " , " + level); //$NON-NLS-1$
// }
// Write the formatted output
GenUtils.addData(propertiesFile, data);
data = null;
} // while
// Reset our location used to compute approximate area for
// unknown locations.
defaultAreaSqKm = DEFAULT;
defaultPolygonArea = DEFAULT;
// Reset all lex-related items
if (lex1DataSet != null) {
lex1DataSet = null;
if (lex2DataSet != null) {
lex2DataSet = null;
if (lex1List != null) {
lex1List = null;
if (lex2List != null) {
lex2List = null;
lexAdmin1File = null;
lexAdmin2File = null;
// We dont need the ISO code object for this country
// anymore, we are done processing it.
isoCode = null;
// Get rid of the lists of polyon area data.
if (level1PolygonAreaList != null) {
level1PolygonAreaList = null;
if (level2PolygonAreaList != null) {
level2PolygonAreaList = null;
// Reset the area list
if (areaList != null) {
areaList = null;
// Close all open resources.
if (propertiesFile != null) {
propertiesFile = null;
UtilLogger.logInformation("\t\tGenerated : " + fileName); //$NON-NLS-1$
fileName = null;
// Run garbage collection
} catch (IOException e) {
} // for
* Generate the header comment for this file -- it varies for different
* admin levels.
* @param code
* ISO 3166-1 code
* @param level
* administrative level
* @return the header for the new file.
protected String getHeader(ISOCode1 code, int level) {
final int DATA_DESCRIPTOR = 0;
final int URL = 1;
// Create the file name for the URL file.
String urlFile = workspace + GenConstants.PARAMS_DIR
+ GenConstants.URLS_DIR;
urlFile += code.getName() + ".txt"; //$NON-NLS-1$
String header = null;
header = "# " + code.getAlpha3() + "_" + level + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
header += "# This file contains area values for " + code.getName() //$NON-NLS-1$
+ "\n"; //$NON-NLS-1$
// header += Calendar.getInstance().getTime() + "\n";
// Add the copyright statement.
header += GenUtils.getCopyright();
// Add an explanation of the key format in our data files.
header += GenUtils.getKeyFormatExplanation();
header += "# This is the name of the class that will interpret the rest of the file's contents\n"; //$NON-NLS-1$
header += "RECORD_CLASSNAME =$CountryAreaLabelGraphRecord\n\n"; //$NON-NLS-1$
header += "# The ISO-3166-1 alpha3 code for the country\n"; //$NON-NLS-1$
header += "ISOKEY = " + code.getAlpha3() + "\n\n"; //$NON-NLS-1$ //$NON-NLS-2$
header += "# The administration level of the area data\n"; //$NON-NLS-1$
header += "ADMIN_LEVEL = " + level + "\n\n"; //$NON-NLS-1$ //$NON-NLS-2$
header += "# This is the date range for which the area values in this file are valid \n"; //$NON-NLS-1$
header += "# See\n"; //$NON-NLS-1$
header += "VALID = start=1900-01-01;\n\n"; //$NON-NLS-1$
// The United Nations data policy applies only to Europe :
// header += GenUtils.getUNPolicy();
header += "# This is the source of the data\n"; //$NON-NLS-1$
// Open the URL file for this country.
// UtilLogger.logInformation("url file = " + urlFile);
BufferedReader urlReader = GenUtils.openReader(urlFile);
try {
// Add all the source URLs for this country.
if (urlReader != null) {
String buffer = null;
while (GenUtils.EOF(buffer = urlReader.readLine()) != true) {
// UtilLogger.logInformation("Buffer = " + buffer);
// Tokenize
StringTokenizer tokenizer = new StringTokenizer(buffer, "*"); //$NON-NLS-1$
// We know that we expect only two tokens. Get them. Order
// matters here.
String[] tokens = new String[2];
tokens[DATA_DESCRIPTOR] = tokenizer.nextToken();
tokens[URL] = tokenizer.nextToken();
// URL sourceURL = null;
// Is this data descriptor one of our data sources (source
// URL) ?
if (GenUtils.isSourceURL(urlSet, tokens[DATA_DESCRIPTOR]) == true) {
// try {
// Is this data descriptor one of our active data
// sources ?
if (GenUtils.isActiveURL(urlStatus,
tokens[DATA_DESCRIPTOR]) == false) {
// No, it is not one an active URL, so comment
// it
// out.
// sourceURL = new URL(tokens[URL]);
// header += "# "
// + "SOURCE = "
// + URLEncoder.encode(sourceURL
// .toString(), "UTF-8") + "\n";
header += "# " + "SOURCE = " + tokens[URL] + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
} else {
// header += "SOURCE = "
// + URLEncoder.encode(sourceURL.toString(),
// "UTF-8") + "\n";
header += "SOURCE = " + tokens[URL] + "\n"; //$NON-NLS-1$ //$NON-NLS-2$
// } catch ( e) {
// UtilLogger.logInformation("Error:" + e.getMessage());
// //$NON-NLS-1$
// }
} // while
// To make it look better, add an extra new line.
header += "\n"; //$NON-NLS-1$
} catch (IOException e) {
if (level == DataTypes.LEVEL_1) {
header += "\n# State/Province\n\n"; //$NON-NLS-1$
} else if (level == DataTypes.LEVEL_2) {
header += "# Level 1 (admin 1 = e.g., state)\n"; //$NON-NLS-1$
header += "# Level 2 (admin 2 = e.g., county)\n\n"; //$NON-NLS-1$
return header;
* Find the area value for a given entity.
* @param entity
* the entity for which we want to find the area
* @param level
* the admin level
* @return the area value
protected String getArea(String entity, int level) {
final int ENTITY = 0;
final int LEVEL = 1;
final int AREA = 2;
Iterator it = areaList.iterator();
String entityStr = entity.toUpperCase();
while (it.hasNext()) {
String[] areaData = ((String)":"); //$NON-NLS-1$
// Round up area value to 1
// if (Double.parseDouble(areaData[AREA]) < 1.0) {
// // 1 SQKM is the minimum area value we return.
// areaData[AREA] = "1.0";
// }
if (areaData[ENTITY].trim().toUpperCase().equals(entityStr)) {
// We got a name match, now check both entities belong to same
// admin level
// (i.e. both are cities or both are counties, etc)
// Otherwise it is ambigous because we can have cities and
// counties with the same name
// Check both entities belong to same level
if (String.valueOf(level).equals(areaData[LEVEL].trim()) == false) {
// Ignore : same name , but different level
return areaData[AREA].trim();
return DEFAULT;
* Find the area of a polygon (in polygon units)
* @param descriptor
* @param container
* @param polygonAreaList
* @param level
* @return a polygon area
protected String getPolygonArea(String descriptor, String container,
List polygonAreaList, int level) {
final int CONTAINER = 0;
final int ADMINISTRATION = 1;
final int DESCRIPTOR = 2;
final int POLYGON_AREA = 3;
Iterator it = polygonAreaList.iterator();
String descriptorStr = descriptor.toUpperCase().trim();
String containerStr = container.toUpperCase().trim();
while (it.hasNext()) {
String match = null;
String[] areaData = ((String)":"); //$NON-NLS-1$
match = areaData[ADMINISTRATION].toUpperCase().trim();
switch (level) {
case DataTypes.LEVEL_1:
// We match based on the administration name.
match = areaData[ADMINISTRATION].toUpperCase().trim();
case DataTypes.LEVEL_2:
// We match based on the descriptor of the level 2 admin.
match = areaData[DESCRIPTOR].toUpperCase().trim();
// int endIndex = match.lastIndexOf("-");
// match = match.substring(0, endIndex);
// UtilLogger.logInformation("Match = " + match);
String areaContainer = areaData[CONTAINER].toUpperCase().trim();
if (match.equals(descriptorStr)
&& areaContainer.equals(containerStr)) {
return areaData[POLYGON_AREA].trim();
return DEFAULT;
* Find the area value for a given entity.
* @param entity
* the entity for which we want to find the area
* @param entityContainer
* @param areaList
* @param level
* the admin level
* @return the area value
protected String getArea(String entity, String entityContainer,
List areaList, int level) {
final int ENTITY = 0;
final int LEVEL = 1;
final int AREA = 2;
String[] areaData = null;
Iterator it = areaList.iterator();
String entityStr = entity.toUpperCase().trim();
entityStr = entityStr.replace("\\t", ""); //$NON-NLS-1$ //$NON-NLS-2$
String entityContainerStr = entityContainer.toUpperCase().trim();
while (it.hasNext()) {
areaData = ((String)":"); //$NON-NLS-1$
// Round up area value to 1
// if (Double.parseDouble(areaData[AREA]) < 1.0) {
// // 1 SQKM is the minimum area value we return.
// areaData[AREA] = "1.0";
// }
String[] compare = null;
// Get the entity name and remove blanks
String location = areaData[ENTITY].toUpperCase().trim();
location = location.toUpperCase().trim();
// Handle Aliases -- complex stuff :$
if (location.contains("[")) { //$NON-NLS-1$
location = location.replace("]", ""); //$NON-NLS-1$ //$NON-NLS-2$
// Tokenize
StringTokenizer tokens = new StringTokenizer(location, "["); //$NON-NLS-1$
// Get tokens
compare = new String[tokens.countTokens()];
int i = 0;
while (tokens.hasMoreTokens()) {
compare[i++] = tokens.nextToken().toUpperCase().trim();
if (compare != null) {
for (int i = 0; i < compare.length; i++) {
if (entityStr.replace("-", " ").equals( //$NON-NLS-1$ //$NON-NLS-2$
compare[i].replace("-", " ")) //$NON-NLS-1$ //$NON-NLS-2$
|| compare[i].equals(entityStr + "-1") //$NON-NLS-1$
|| compare[i].equals(entityContainerStr)) {
// Check both entities belong to same level
if (String.valueOf(level)
.equals(areaData[LEVEL].trim()) == false) {
// Ignore : same name , but different level
return areaData[AREA].trim();
} // for
} else if (entityStr.equals(location)
|| entityStr.replace("-", " ").equals(location) //$NON-NLS-1$ //$NON-NLS-2$
|| location.equals(entityStr + "-1")) { //$NON-NLS-1$
// Check both entities belong to same level
if (String.valueOf(level).equals(areaData[LEVEL].trim()) == false) {
// Ignore : same name , but different level
return areaData[AREA].trim();
} // while
return DEFAULT;
* Find the area value for a given county (level 2 admin) within the USA
* @param entity
* the entity for which we want to find the area
* @param state
* @param areaList
* @return the area value
protected String getUSAL2Area(String entity, String state, List areaList) {
final int ENTITY = 0;
final int STATE_ID = 1;
final int AREA = 2;
String[] areaData = null;
Iterator it = areaList.iterator();
String entityStr = entity.toUpperCase().trim();
entityStr = entityStr.replace("\\t", ""); //$NON-NLS-1$ //$NON-NLS-2$
String stateStr = state.toUpperCase().trim();
while (it.hasNext()) {
areaData = ((String)":"); //$NON-NLS-1$
// Get the entity name and remove blanks
String location = areaData[ENTITY].toUpperCase().trim();
location = location.toUpperCase().trim();
if (entityStr.equals(location)) {
// Check both entities belong to same state
if (stateStr.equals(areaData[STATE_ID].trim()) == false) {
// Ignore : same name , but different level
return areaData[AREA].trim();
} // while
return DEFAULT;
* Find the approximate area value for a known location.
* @param level2Container
* @param level1Container
* @param level0Container
* @param level1AreaSqKm
* @return the approximate area value
protected String getApproximateL2Area(String level2Container,
String level1Container, String level0Container,
String level1AreaSqKm) {
// Find the area in polygon units of the level 1 container
String level1PolygonArea = getPolygonArea(level1Container,
level0Container, level1PolygonAreaList, 1);
// Find the area in polygon units of the level 2 administration
String level2PolygonArea = getPolygonArea(level2Container,
level1Container, level2PolygonAreaList, 2);
// Compute the approximate area, but check division by zero first.
if (Double.parseDouble(level2PolygonArea) <= 0) {
return DEFAULT;
double ratio = Double.parseDouble(level2PolygonArea)
/ Double.parseDouble(level1PolygonArea);
String area = String
.valueOf(ratio * Double.parseDouble(level1AreaSqKm));
return area;
* Find the approximate area value for a known location.
* @param level1Container
* @param level0Container
* @param aggreateLevel1PolygonArea
* @param level1AreaSqKm
* @return the approximate area value
protected String getApproximateL1Area(String level1Container,
String level0Container, String aggregateLevel1PolygonArea,
String level0AreaSqKm) {
// Find the area in polygon units of the level 1 container
String level1PolygonArea = getPolygonArea(level1Container,
level0Container, level1PolygonAreaList, 1);
// Compute the approximate area, but check division by zero first.
if (Double.parseDouble(level1PolygonArea) <= 0) {
return DEFAULT;
double ratio = Double.parseDouble(level1PolygonArea)
/ Double.parseDouble(aggregateLevel1PolygonArea);
String area = String
.valueOf(ratio * Double.parseDouble(level0AreaSqKm));
return area;
* @return sum of all level K areas either in polygon units
protected String getAggregateArea(List areaList) {
final int AREA = 3;
Double aggregate = new Double(0.0);
if (areaList == null)
return "1.0"; //$NON-NLS-1$
Iterator it = areaList.iterator();
while (it.hasNext()) {
String[] areaData = ((String)":"); //$NON-NLS-1$
aggregate += Double.parseDouble(areaData[AREA].trim());
return Double.toString(aggregate);
* Main execution entry point.
* @param args
public static void main(String[] args) {
// final int ISO_3166_1 = 0;
// final int ISO_3166_2 = 1;
// final int FIX_FILE = 2;
// final int AREA_DATA = 3;
// final int TARGET_LIST = 4;
// final int POLYGON_AREAS = 5;
// final int LEX_DIR = 6;
// final int OUTPUT_DIR = 7;
// final int PARAMS = 8;
// if (args.length < PARAMS) {
// UtilLogger.logInformation("--Wrong arguments--"); //$NON-NLS-1$
// System.out
// .println("\tTo run, please provide the following arguments : ");
// //$NON-NLS-1$
// UtilLogger.logInformation("\t\t ISO 3166-1 file"); //$NON-NLS-1$
// UtilLogger.logInformation("\t\t ISO 3166-2 file"); //$NON-NLS-1$
// UtilLogger.logInformation("\t\t ISO fix file"); //$NON-NLS-1$
// UtilLogger.logInformation("\t\t Area file"); //$NON-NLS-1$
// UtilLogger.logInformation("\t\t Target countries file"); //$NON-NLS-1$
// UtilLogger.logInformation("\t\t Polygon areas"); //$NON-NLS-1$
// UtilLogger.logInformation("\t\t Lexicographic sorting directory");
// //$NON-NLS-1$
// UtilLogger.logInformation("\t\t Output directory"); //$NON-NLS-1$
// return;
// }
// AreaGenerator gen = new AreaGenerator(args[ISO_3166_1],
// args[ISO_3166_2], args[FIX_FILE], args[AREA_DATA],
// args[TARGET_LIST], args[POLYGON_AREAS], args[LEX_DIR],
// args[OUTPUT_DIR]);
final int CONFIG_FILE = 0;
final int PARAMS = 1;
if (args.length < PARAMS) {
UtilLogger.logInformation("\t\t--Wrong arguments--"); //$NON-NLS-1$
.println("\t\tTo run, please provide the following argument(s) : "); //$NON-NLS-1$
UtilLogger.logInformation("\t\t\t Configuration file"); //$NON-NLS-1$
AreaGenerator gen = new AreaGenerator(args[CONFIG_FILE]);;