blob: 81fce7f026e907690c5a48e734c77b8b15d7677b [file] [log] [blame]
package org.eclipse.stem.internal.data.geography.graphsynchronizer;
/*******************************************************************************
* Copyright (c) 2011 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
import java.awt.Polygon;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.eclipse.emf.common.util.URI;
import org.eclipse.emf.ecore.resource.impl.ExtensibleURIConverterImpl;
import org.eclipse.stem.internal.data.geography.graphsynchronizer.CommonBorderReader.CommonBorderRelation;
/**
* From the STEM data generates an up to date file containing
* number of high administrative level nodes by country along with a matrix enumerating edges between every pair of countries i,j
*/
public class WorldSubCountryPartitioningStats {
List<String> countryPreIDList;
static List<File> filtered;
static File[] retVal;
private static final String LEVEL_1_STATS_FILE = "temp/data/statistics/subCountryPartioningStats.csv"; //$NON-NLS-1$
static final String CODES_FILE = "countryCodeMap.csv"; //$NON-NLS-1$
// TODO fix this
private static final URI CODES_FILE_URI = URI.createURI("./resources/data/statistics/" + CODES_FILE);//$NON-NLS-1$
static Map<String,String> countryCode2to3 = new HashMap<String,String>();
static Map<String,String> countryCode3to2 = new HashMap<String,String>();
static String [] countriesToProcess = null;
static int NUM_COUNTRIES = -1;
// the level 1 IDs to process (some are level 0 if no level 1 exists inside);
static String[] level1IDs = null;
static int[][] commonBorderMatrix = null;
static List<String> countries = new ArrayList<String>();
static final String countryFilesPrefix = "resources/data/country/"; //$NON-NLS-1$
static Map<String, Set<String>> countryIdLevel1Map = new HashMap<String, Set<String>>();
static Map<String, Set<String>> countryIdHighestMap = new HashMap<String, Set<String>>();
static Map<String, List<String>> idsByCountry= new HashMap<String, List<String>>();
/**
* highest admin level of regions for each country
*/
static Map<String, Integer> countryLevelMap = new HashMap<String, Integer>();
/**
* number of nodes at highest admin level for each country
*/
//static Map<String, Integer> countryLevel0NodeCountMap = new HashMap<String, Integer>();
static Map<String, Integer> countryLevel1NodeCountMap = new HashMap<String, Integer>();
Polygon pixPoly;
/**
* From the STEM data generates an up to date file containing
* number of high administrative level nodes by country along with a matrix enumerating edges between every pair of countries i,j
*/
@SuppressWarnings("nls")
public WorldSubCountryPartitioningStats() {
mapCodes(); // map the three letter to two letter codes
// must do this first - initialization
getAllCountriesToProcess();
// read each folder
readCountries();
System.out.println("WorldPartioningStats() Updating States for "+countriesToProcess.length+" countries");
for(int i = 0; i < countriesToProcess.length;i++) {
readCountryCodes(countriesToProcess[i]);
readLevel1CountryCodes(countriesToProcess[i]);
// now we need to replace all IDs with just the max level id's
Set<String> allIDs = countryIdHighestMap.get(countriesToProcess[i]);
int maxAdminLevel = getHighestAdminLevel(allIDs);
Set<String> highestIDs = getHighestAdminSet(allIDs,maxAdminLevel,countriesToProcess[i]);
countryLevelMap.put(countriesToProcess[i], new Integer(maxAdminLevel));
countryIdHighestMap.remove(countriesToProcess[i]);
countryIdHighestMap.put(countriesToProcess[i], highestIDs);
//Activator.logInformation(""+countriesToProcess[i]+" has "+highestIDs.size()+" low level nodes");
// countryLevel0NodeCountMap.put(countriesToProcess[i],new Integer(highestIDs.size()) );
}// for i countries
// get the size of the level1 regions
populateLeve1Level2Map();
// next get the number of common border relations between each country
populateCommonBorderMatrix();
writeStats();
}
/**
* main() method for testing only
* @param args
*/
public static void main(String[] args) {
@SuppressWarnings("unused")
WorldSubCountryPartitioningStats partitioningStats = new WorldSubCountryPartitioningStats();
System.exit(0);
}
/**
*
* @return
*/
@SuppressWarnings("nls")
public void getAllCountriesToProcess() {
List<String> fileNames = new ArrayList<String>();
File countryDir = new File(countryFilesPrefix);
if (countryDir.isDirectory()) {
File [] files = countryDir.listFiles();
for(int i =0 ; i < files.length; i ++) {
if(files[i].isDirectory()) {
String name = files[i].getName();
if(name.indexOf(".")==-1) {
fileNames.add(name);
}
}
}
}
NUM_COUNTRIES = fileNames.size();
countriesToProcess = fileNames.toArray(new String[NUM_COUNTRIES]);
Arrays.sort(countriesToProcess);
//Activator.logInformation(" *** read "+NUM_COUNTRIES+ "country folders");
}
/**
* read in a list of all countries
*/
@SuppressWarnings("nls")
public void mapCodes() {
String record;
// int recCount = 0;
BufferedReader d = null;
try {
d = new BufferedReader(new InputStreamReader(getInputStreamForURI(CODES_FILE_URI)));
while ( (record=d.readLine()) != null ) {
// recCount++;
StringTokenizer st = new StringTokenizer(record );
@SuppressWarnings("unused")
String name = st.nextToken(",");
String twoLetter = st.nextToken(",");
String threeLetter = st.nextToken(",");
countryCode2to3.put(twoLetter,threeLetter);
countryCode3to2.put(threeLetter,twoLetter);
} // lines
} catch (IOException e) {
// catch io errors from FileInputStream or readLine()
System.out.println(" IOException error!" + e.getMessage());
} finally {
try {
d.close();
} catch (Exception e) { }
}
} // read codes
/**
*
* @param countryIndex
*/
@SuppressWarnings("nls")
public void populateCommonBorderMatrix() {
CommonBorderReader cbr = new CommonBorderReader();
Map<String, List<CommonBorderRelation>> commonBorderMap = cbr.getCommonBorderMap();
String[] relationFiles = commonBorderMap.keySet().toArray(new String[commonBorderMap.keySet().size()]);
for(int i = 0; i < level1IDs.length; i ++) {
String id1 = level1IDs[i];
System.out.println("mapping "+id1+" id "+i+" of "+level1IDs.length);
// set of the highest IDs contained in id1
Set<String> highestIDs = countryIdLevel1Map.get(id1);
// the country of id1
String country = "";
int LVL = getAdminLevel(id1);
// get the three letter code for the country that contains this state
if(LVL==0) {
country = id1;
} else {
String id2Letter = id1.substring(0,2);
country = countryCode2to3.get(id2Letter);
}
// now get the max level of the data
LVL = countryLevelMap.get(country).intValue();
String fileKey0 = country+"_"+LVL;
// now find ALL the common border files that contain fileKey0
for(int k = 0; k < relationFiles.length; k ++) {
String fileName = relationFiles[k];
// ignore the Vermont data for now !!
if(fileName.indexOf("USA_3") == -1) {
if(fileName.indexOf(fileKey0)>=0) {
// we have a file
List<CommonBorderRelation> relationList = commonBorderMap.get(fileName);
// loop through all the possible relations
for(int kk = 0; kk < relationList.size(); kk ++) {
CommonBorderRelation edge = relationList.get(kk);
// loop through all the nodes contained inside id1
Iterator<String> iter = highestIDs.iterator();
while((iter!=null)&&(iter.hasNext())) {
String id12 = iter.next();
String idOther = edge.getNeighborTo(id12);
String parentOther = idOther; // default. Works for neighborLevel = 2 or 1
if(idOther != null) {
// found a neighbor. Get it's level 1 container
int neighborLevel = getAdminLevel(idOther);
if(neighborLevel>=2) {
parentOther = getLevel1Parent(idOther);
}
// now find the INDEX of the neighbors parent
for(int j = 0; j < level1IDs.length; j ++) {
if(level1IDs[j].equalsIgnoreCase(parentOther)) {
// and increment the matrix
if(i!=j) commonBorderMatrix[i][j]++;
}
}// for j level1IDs
}
}
}//for(int kk = 0; kk < relationList.size(); kk ++)
}//if(fileName.indexOf(fileKey0)>=0)
}// ignore the Vermont data for now !!
}// for k relationFiles
} //for all i level 1 ids
}//populateCommonBorderMatrix
/**
* get the size of the level1 regions (number nodes contained
*/
public void populateLeve1Level2Map() {
Set<String> idSet = countryIdLevel1Map.keySet();
level1IDs = idSet.toArray(new String[idSet.size()]);
Arrays.sort(level1IDs);
int numIDs = level1IDs.length;
System.out.println(" >>> got "+numIDs+" ids at level 1. Zeroing the Matrix");
commonBorderMatrix = new int[numIDs][numIDs];
for(int i = 0; i <numIDs; i ++ ) {
for(int j = 0; j <numIDs; j ++ ) {
commonBorderMatrix[i][j] = 0;
}
}
for(int i = 0; i < numIDs; i ++) {
String id = level1IDs[i];
System.out.println("processing **** "+id);
int level = getAdminLevel(id);
if(level == 0) {
// 1. if this is a country node then there were NO level 1 nodes
if(countryIdHighestMap.containsKey(id)) {
Set<String> highestNodes = countryIdHighestMap.get(id);
countryIdLevel1Map.put(id, highestNodes);
countryLevel1NodeCountMap.put(id,new Integer(highestNodes.size()) );
}else {
System.out.println("ERROR: id="+id+"not found in countryIdHighestMap !!");
System.exit(1);
}
} else {
// this is really a level 1 key
// so find out what country it is
String id2Letter = id.substring(0,2);
String id3Letter = countryCode2to3.get(id2Letter);
Set<String> highestNodes = countryIdHighestMap.get(id3Letter);
Set<String> containedNodes = new HashSet<String>();
for(String node: highestNodes) {
if(node.indexOf(id)==0) {
System.out.println(" "+id+" contains "+node);
containedNodes.add(node);
}
}
countryIdLevel1Map.put(id, containedNodes);
countryLevel1NodeCountMap.put(id,new Integer(containedNodes.size()) );
}
}// all level 1 ids
}//populateLeve1Leve2Map()
/**
*
*/
public void readCountries() {
for (int i = 0; i < countriesToProcess.length; i ++) {
countries.add(countriesToProcess[i]);
countryPreIDList = new ArrayList<String>();
countryPreIDList.add(countriesToProcess[i]);
idsByCountry.put(countriesToProcess[i], countryPreIDList);
}
}
/**
* @param files
* @return retVal
*/
public static File[] filterAndSortFiles(File[] files) {
filtered = new ArrayList<File>();
for (int i = 0; i < files.length; i ++) {
String name = files[i].getName();
if(name.indexOf("_names.properties")>=1) { //$NON-NLS-1$
filtered.add(files[i]);
}
}
retVal = new File[filtered.size()];
for (int i = 0; i < filtered.size(); i ++) {
retVal[i] = filtered.get(i);
}
Arrays.sort(retVal);
return retVal;
}
/**
* @param stemid
* @return administration level
*/
public int getAdminLevel(String stemid) {
String [] splitID = stemid.split("-"); //$NON-NLS-1$
if(splitID.length == 4) {
return 3;
}
else if(splitID.length == 3) {
return 2;
}
else if(splitID.length == 2) {
return 1;
}
return splitID.length-1;
}
/**
*
* @param stemid
* @return
*/
public String getLevel1Parent(String stemid) {
String [] splitID = stemid.split("-"); //$NON-NLS-1$
return splitID[0]+"-"+splitID[1];
}
/**
* Get a map of all the level 1 keys
* @param country
* @return countryPolygonMap
* Key: country name, Value: polygons within country
*/
public void readLevel1CountryCodes(String country) {
Set<String> stateIDSet = null;
NameReader nameReader = new NameReader();
File countryDir = new File(countryFilesPrefix + country);
if (countryDir.isDirectory()) {
File [] files = filterAndSortFiles(countryDir.listFiles());
for (int idx = 0; idx < files.length; idx++) {
File countryFile = files[idx];
@SuppressWarnings("unused")
String name = countryFile.getAbsolutePath();
Set<String> idSet = (nameReader.readNames(countryFile.getAbsolutePath())).keySet();
boolean haveLevel1 = false;
Iterator<String> iter = idSet.iterator();
while( (iter!=null)&&(iter.hasNext()) ) {
String id = iter.next();
int level = getAdminLevel(id);
if(level==1) {
haveLevel1 = true;
// put the empty set
countryIdLevel1Map.put(id, stateIDSet);
}
}
if(!haveLevel1) {
// System.out.println(">>>> "+country+" has no level 1 nodes");
countryIdLevel1Map.put(country, stateIDSet); // in case there are NO level 1 ids.
}
}
} else {
// should never happen
System.out.println("problem reading "+country); //$NON-NLS-1$
System.exit(1);
}
}// readLevel1CountryCodes
/**
* @param country
* @return countryPolygonMap
* Key: country name, Value: polygons within country
*/
public void readCountryCodes(String country) {
Set<String> countryIDSet = null;
NameReader nameReader = new NameReader();
if(countryIdHighestMap.containsKey(country)) {
countryIDSet = countryIdHighestMap.get(country);
} else {
countryIDSet = new HashSet<String>();
countryIdHighestMap.put(country, countryIDSet);
}
File countryDir = new File(countryFilesPrefix + country);
if (countryDir.isDirectory()) {
File [] files = filterAndSortFiles(countryDir.listFiles());
for (int idx = 0; idx < files.length; idx++) {
File countryFile = files[idx];
@SuppressWarnings("unused")
String name = countryFile.getAbsolutePath();
Set<String> idSet = (nameReader.readNames(countryFile.getAbsolutePath())).keySet();
Iterator<String> iter = idSet.iterator();
while( (iter!=null)&&(iter.hasNext()) ) {
String id = iter.next();
if(!id.equalsIgnoreCase(country)) {
countryIDSet.add(id);
}
}
}
} else {
// should never happen
System.out.println("problem reading "+country); //$NON-NLS-1$
System.exit(1);
}
if(countryIDSet.size()==0) countryIDSet.add(country);
}// readCountryCodes
/**
*
* @param allIdSet
* @return
*/
public Set<String> getHighestAdminSet(Set<String> allIdSet, int maxAdmin, String threeLetterCode) {
Set<String> leveledIDList = new HashSet<String>();
for(String stemid: allIdSet) {
//Activator.logInformation("id = "+stemid+" "+threeLetterCode);
int lvl = getAdminLevel(stemid);
if((maxAdmin >=1)&&(lvl == maxAdmin)) {
leveledIDList.add(stemid);
}
if((maxAdmin ==0)&&(lvl == maxAdmin)) {
leveledIDList.add(threeLetterCode);
}
}
return leveledIDList;
}
/**
*
* @param allIdSet
* @return the highest admin level of the set
*/
public int getHighestAdminLevel(Set<String> allIdSet) {
int maxAdmin = 0;
for(String stemid: allIdSet) {
int lvl = getAdminLevel(stemid);
if(lvl >= maxAdmin) maxAdmin = lvl;
if(stemid.indexOf("US-")==0) { //$NON-NLS-1$
return 2; // ignore the level 3 data for now (vermont)
}
}
return maxAdmin;
}
/**
* Gets a compatible input stream for the given URI
* @return
* @throws IOException
*/
private InputStream getInputStreamForURI(URI uri) throws IOException
{
return new ExtensibleURIConverterImpl().createInputStream(uri);
}
/**
*
*/
@SuppressWarnings("nls")
private static void writeStats() {
try {
File statsFile = new File(LEVEL_1_STATS_FILE);
statsFile.getParentFile().mkdirs();
FileWriter fw = new FileWriter(statsFile);
System.out.println("saving data to "+statsFile.getAbsolutePath());
// rows
for(int j = 0; j < level1IDs.length; j ++) {
String s = "";
String id1 = level1IDs[j];
int nodes = countryLevel1NodeCountMap.get(id1).intValue();
s += id1+","+nodes;
//columns
for(int i = 0; i < level1IDs.length; i ++) {
s+=","+commonBorderMatrix[j][i];
}// i columns
s += "\n";
fw.write(s);
}
fw.flush();
fw.close();
System.out.println("DONE");
} catch (Exception e) {
System.out.println("Error writing file " + e.getMessage());
}
} // write CENTERS data
}