blob: 1a241e9a5150edb21586c155c9f15392a327630c [file] [log] [blame]
/**
*
*/
package org.eclipse.stem.utility.generators;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/*******************************************************************************
* Copyright (c) 2006 IBM Corporation and others. All rights reserved. This
* program and the accompanying materials are made available under the terms of
* the Eclipse Public License v1.0 which accompanies this distribution, and is
* available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: IBM Corporation - initial API and implementation
******************************************************************************/
/**
*
* STEM II Data Migration/Generation Utilities : Instances of this utility class
* convert data from an alternative format into Diva format. It was used for a
* very specific purpose to help bring the data for the USA into the same format
* as the rest of the files in the Diva set.
*
*
* To configure, see configuration file :
* org.eclipse.stem.utility/configuration/USA2DivaFormatConverter.config
*
*
* To run, add path to configuration file in the run profile (i.e.
* C:\stemII\org.eclipse.stem.utility\configuration\USA2DivaFormatConverter.config)
*
* To run, add the following parameters to the run configuration:
*
* C:\stemII\org.eclipse.stem.utility\dataMigration\input\USA_L1_Data\USA_L1.txt
* C:\stemII\org.eclipse.stem.utility\dataMigration\input\properties\formatList.txt
* C:\diva\NorthAmerica\USA\
*
*/
public class USA2DivaFormatConverter {
/**
* Our source files for ISO 3166-1 codes and for the list of target
* countries.
*/
// private String iso3166_1 = null;
private String usaL1File = null;
private String targetFile = null;
private String outputDir = null;
private String workspace = null;
/**
* List for ISO 3166-1 codes and target countries.
*/
List targetList = null;
// List codeList = null;
List dataList = null;
/**
* Constructor
*
* @param usaL1File -
* data file with the ISO3166-1 ALPHA_2 and ALPHA_3 codes.
* @param targetFile -
* data file where we find a list of countries for generation of
* their corresponding area.properties files.
* @param outputDir -
* the output directory for the files generated.
*
*/
// public USA2DivaFormatConverter(/* String iso3166_1, */String usaL1File,
// String targetFile, String outputDir) {
// // Set values of global variables.
// // this.iso3166_1 = iso3166_1;
// this.usaL1File = usaL1File;
// this.targetFile = targetFile;
// this.outputDir = outputDir;
// targetList = null;
// }
/**
* @param configFile
*/
public USA2DivaFormatConverter(String configFile) {
// Load all configuration variables
HashMap<String, String> vars = GenUtils.loadConfigFile(configFile);
Set<String> keys = vars.keySet();
Iterator it = keys.iterator();
while (it.hasNext()) {
String var = (String) it.next();
// Assign values to configuration variables :
if (var.equals("WORKSPACE")) { //$NON-NLS-1$
workspace = vars.get(var);
} else if (var.equals("NAMES_LIST")) { //$NON-NLS-1$
usaL1File = vars.get(var);
} else if (var.equals("TARGET_LIST")) { //$NON-NLS-1$
targetFile = vars.get(var);
} else if (var.equals("PROPERTIES_OUTPUT")) { //$NON-NLS-1$
outputDir = vars.get(var);
}
} // while
// The workspace variable should end with a slash character
if (workspace.endsWith("\\") == false) { //$NON-NLS-1$
workspace += "\\"; //$NON-NLS-1$
}
String path = workspace + GenConstants.PARAMS_DIR;
// Set remaining config vars using WORKSPACE (workspace relative)
if (usaL1File == null) {
usaL1File = path + GenConstants.USA_L1_DATA;
}
if (targetFile == null) {
targetFile = path + GenConstants.FORMAT_LIST;
}
if (outputDir == null) {
outputDir = GenConstants.DUMMY_DIR;
}
targetList = null;
}
/**
* This method sets up and runs this generator.
*
*/
public void run() {
// Read and keep in memory a list of the countries we want.
targetList = GenUtils.populateList(targetFile);
// Read and keep in memory a list of the ISO3166-1 ALPHA2, ALPHA3, and
// numeric codes.
// codeList = GenUtils.populateList(iso3166_1);
// Read level 1 data for the USA
dataList = GenUtils.populateList(usaL1File);
// Run the converter: US Census format -> Diva set format
process();
}
/**
* Convert a data file that comes into the format from the US Census :
* http://www.census.gov/geo/www/cob/co2000.html into the Diva format that
* the generators can handle. Both formats, the US Census the Diva format
* are fixed, well-defined formats. This wont format any arbitrary format
* into Diva format. Converts : US Census format -> Diva format
*
*/
protected void process() {
// A writer for the new file we are creating.
PrintWriter divaFile = null;
// Object holding ISO3166-1 related data.
// ISOCode1 isoCode = null;
// Name of the new properties file.
String fileName = null;
int size = targetList.size();
// For every country
for (int i = 0; i < size; i++) {
// Get the next file in the list.
String file = ((String) targetList.get(i)).trim();
// Open the data source file for this country.
BufferedReader reader = GenUtils.openReader(file);
System.out
.println("\t\t<<<< USA TO DIVA FORMAT CONVERTER -- Processing Country : " + file + " >>>>"); //$NON-NLS-1$ //$NON-NLS-2$
try {
if (reader != null) {
String buffer = null;
while (GenUtils.EOF(buffer = reader.readLine()) != true) {
// Make sure that we mark all unknown data.
buffer = buffer.replace(",,", ",UNKNOWN,"); //$NON-NLS-1$ //$NON-NLS-2$
String[] items = buffer.split(","); //$NON-NLS-1$
// If the directory does not exist for this country,
// create it using ISO alpha3 code.
File countryDir = new File(outputDir + "USA"); //$NON-NLS-1$
if (countryDir != null && countryDir.exists() == false) {
// Directory does not exist. Create it.
countryDir.mkdir();
}
// Create and open the new output file.
if (divaFile == null) {
// Create the file name
fileName = countryDir + "\\USA.txt"; //$NON-NLS-1$
// Create tne new file
divaFile = GenUtils.openWriter(fileName);
}
// Apply the formatting to the current buffer : US
// Census format -> Diva format
String divaLine = convert2Diva(items);
// Write the converted line
// System.out.println("Diva string = " + divaLine);
GenUtils.addData(divaFile, divaLine);
}// while
// This is an ugly hack since the editors are not powerful
// enough and cant handle copy paste operations of 20MEGS
// appendData(divaFile);
// Close the target file.
divaFile.close();
// Close all open resources.
reader.close();
// Run garbage collection
System.gc();
}
} catch (IOException e) {
e.printStackTrace();
}
} // for
}
/**
* Convert the buffer given in US Census format into Diva format
*
* @param data
* a buffer with data on US Census format
*
* @return converted string
*
*/
public String convert2Diva(String[] data) {
// Useful symbolic constants
final int STATE_ID = 4;
final int L1_ADMIN = 5;
final int L2_ADMIN = 6;
final int L2_ADMIN_TYPE = 8; // VALID TYPES = BOROUGH, PARISH,COUNTY,
// CITY. INVALID = CENSUS AREA
// Level of the data we are processing
int level = -1;
// State ID
String state_id = null;
// The formatted prefix string
String divaString = "USA,United States"; //$NON-NLS-1$
// Get the level of the data we are processing.
level = GenUtils.isNumeric(data[L1_ADMIN]) == false ? 1 : 2;
// Get the state ID
state_id = data[STATE_ID];
// Get rid of blanks
state_id = state_id.trim();
// * IF LEVEL == 1, then we can ignore columns 6,7,8, and 9
//
// ** Level 1 data should be in the format:
//
// USA, United States, Level1 Admin, <POLYGON DATA>
//
//
// * IF LEVEL == 2, then we can ignore columns 6,8, and 9
//
// ** Level 2 data should be in the format:
//
// USA, United States, Level1 Admin, Level 2 Admin, <POLYGON DATA>
//
// To find the level 1 container, iterate over the list with data for
// the USA. If during iteration we match state_id, then that is the
// container.
// Get the number of items in the buffer
int length = data.length;
// Process each element in the buffer -- ignore the first five items.
for (int i = 5; i < length; i++) {
// Ignore irrelevant data items that occur at both levels
if (i == 7 || i == 8) {
continue;
}
// Ignore irrelevant data that only occurs at level 1
if (level == 1) {
if (i == 6 || i == 9) {
continue;
}
}
// Ignore irrelevant data that only occurs at level 2
// If level is 2, then find the container for this entity
if (level == 2) {
// Irrelevant data, ignore.
if (i == 5) {
continue;
}
// Find the container of the level 2 administration.
if (i == 6) {
divaString += "," + findContainer(state_id, dataList); //$NON-NLS-1$
if (data[L2_ADMIN_TYPE].contains("Census Area") == false) { //$NON-NLS-1$
// Add the type to the name of the administration :
// (i.e. for Fairfax = admin and city = type, get
// Fairfax city)
divaString += "," + data[L2_ADMIN] + " " //$NON-NLS-1$ //$NON-NLS-2$
+ data[L2_ADMIN_TYPE];
continue;
}
}
}
divaString += "," + data[i]; //$NON-NLS-1$
} // for
// Finally add a new line to the string.
divaString += "\n"; //$NON-NLS-1$
return divaString;
}
/**
* Find the level 1 container for the current level 2 administration
*
* @param state -
* the id of the container (state)
*
* @param dataList -
* a list of level 1 data for the USA
*
* @return - the name of the level 1 container
*
*/
public String findContainer(String state, List dataList) {
// Format in data list is :
// US : USA : US-AL : Alabama : 1
// Useful symbolic constants
final int CONTAINER = 3;
final int STATE_ID = 4;
String stateStr = state.replace("\\t", ""); //$NON-NLS-1$ //$NON-NLS-2$
Iterator it = dataList.iterator();
while (it.hasNext()) {
String entry = ((String) it.next()).toUpperCase().trim();
entry = entry.replaceAll("\\t", ""); //$NON-NLS-1$ //$NON-NLS-2$
String entries[] = entry.split(":"); //$NON-NLS-1$
String current_id = entries[STATE_ID];
if (Integer.parseInt(stateStr) == Integer.parseInt(current_id)) {
return entries[CONTAINER].toUpperCase();
}
}
return "NOT FOUND"; //$NON-NLS-1$
}
/**
* Main execution entry point.
*
* @param args
*/
public static void main(String[] args) {
// final int ISO_3166_1 = 0;
// final int USA_L1_DATA = 0;
// final int TARGET_LIST = 1;
// final int OUTPUT_DIR = 2;
// final int PARAMS = 3;
//
// if (args.length < PARAMS) {
// System.out.println("--Wrong arguments--"); //$NON-NLS-1$
// System.out
// .println("\tTo run, please provide the following arguments : ");
// //$NON-NLS-1$
// System.out.println("\t\t USA level 1 data file"); //$NON-NLS-1$
// System.out.println("\t\t Target countries file"); //$NON-NLS-1$
// System.out.println("\t\t Output directory"); //$NON-NLS-1$
// return;
// }
// USADivaConverter converter = new USADivaConverter(/*
// args[ISO_3166_1], */
// args[USA_L1_DATA], args[TARGET_LIST], args[OUTPUT_DIR]);
// converter.run();
final int CONFIG_FILE = 0;
final int PARAMS = 1;
if (args.length < PARAMS) {
System.out.println("\t\t--Wrong arguments--"); //$NON-NLS-1$
System.out
.println("\t\tTo run, please provide the following argument(s) : "); //$NON-NLS-1$
System.out.println("\t\t\t Configuration file"); //$NON-NLS-1$
System.exit(1);
}
USA2DivaFormatConverter converter = new USA2DivaFormatConverter(
args[CONFIG_FILE]);
converter.run();
}
}