/**
 * 
 */
package org.eclipse.stem.utility.generators;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;

/*******************************************************************************
 * Copyright (c) 2006 IBM Corporation and others. All rights reserved. This
 * program and the accompanying materials are made available under the terms of
 * the Eclipse Public License v1.0 which accompanies this distribution, and is
 * available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: IBM Corporation - initial API and implementation
 ******************************************************************************/

/**
 * 
 * STEM II Data Migration/Generation Utilities : Instances of this class finds
 * out how many level 2 administrations each level 1 admin has. In other words,
 * it profiles each level 1 admin based on its population and the number of
 * level 2 administrations that belong to it.
 * 
 * To run add the following parameters to the run configuration:
 * 
 * C:\stemII\org.eclipse.stem.utility\dataMigration\input\ISO3166-1\ISO3166_1_en.txt
 * C:\stemII\org.eclipse.stem.utility\dataMigration\input\AreaPopulationData\
 * C:\stemII\org.eclipse.stem.utility\dataMigration\input\properties\Countries.txt
 * C:\stemII\org.eclipse.stem.utility\dataMigration\input\profiles\
 * 
 */
public class PopulationProfiler {

	private String populationData = null;

	private String outputDir = null;

	private String targetFile = null;

	private String iso3166_1 = null;

	/**
	 * Lists for population data.
	 */

	List targetList = null;

	List populationList = null;

	List codeList = null;

	/**
	 * String used to indicate default value for unknown areas.
	 */
	String DEFAULT = "0"; //$NON-NLS-1$

	/**
	 * Constructor
	 * 
	 * @param iso3166_1 -
	 *            data file with the ISO3166-1 ALPHA_2 and ALPHA_3 codes.
	 * @param populationData -
	 *            the directory containing population data for all countries
	 * @param targetFile -
	 *            data file where we find a list of countries for generation of
	 *            their corresponding area.properties files.
	 * @param outputDir -
	 *            the output directory for the files generated.
	 * 
	 */
	public PopulationProfiler(String iso3166_1, String populationData,
			String targetFile, String outputDir) {
		// Set values of global variables.
		this.iso3166_1 = iso3166_1;
		this.outputDir = outputDir;
		this.targetFile = targetFile;
		this.populationData = populationData;
	}

	/**
	 * Run the generator.
	 * 
	 */
	public void run() {
		// Read and keep in memory a list of the countries we want.
		targetList = GenUtils.populateList(targetFile);

		// Read and keep in memory a list of the ISO3166-1 ALPHA2, ALPHA3, and
		// numeric codes.
		codeList = GenUtils.populateList(iso3166_1);

		// Now, process each item in the target list.
		process();

	}

	/**
	 * Process our global list of target countries. For each country in list,
	 * generate its population.properties file.
	 * 
	 */
	protected void process() {

		final int CONTAINER = 0;
		final int COUNT = 1;

		/**
		 * Hash map for dealing with population data
		 */
		HashMap<String, String> pData = new HashMap<String, String>();

		// Object holding ISO3166-1 related data.
		ISOCode1 isoCode = null;

		// A writer for the new file we are creating.
		PrintWriter propertiesFile = null;

		// Name of the new properties file.
		String fileName = null;

		int size = targetList.size();

		// Iterate for every country and create its population.properties file.
		for (int i = 0; i < size; i++) {

			// Get the next country in the list.
			String file = ((String) targetList.get(i)).trim();

			// An index for unknwon or n.a. entries
			int unkCounter = 1;

			// Open the data source file for this country.
			BufferedReader reader = GenUtils.openReader(file);
			System.out.println("<<<< Processing country : " + file + " >>>>"); //$NON-NLS-1$ //$NON-NLS-2$
			try {

				if (reader != null) {

					String buffer = null;
					// Describes level
					// of data we
					// are now processing : LEVEL0, LEVEL1, or LEVEL2.
					int level = -1;
					// Record the previous level.
					int oldLevel = level - 1;
					// We take a chunk of the data [BUFFER_MIN,BUFFER_MAX] to
					// make processing more
					// efficient, we dont need all of it.
					int BUFFER_MIN = 0;
					int BUFFER_MAX = 120;

					List<String> entryList = null;

					while (GenUtils.EOF(buffer = reader.readLine()) != true) {

						// Make sure that we mark all unknown data.
						buffer = buffer.replace(",,", ",UNKNOWN,"); //$NON-NLS-1$ //$NON-NLS-2$

						// Use only part of the buffer, not all of it for
						// efficiency.
						String[] items = GenUtils
								.extract(buffer.length() < BUFFER_MAX ? buffer
										: buffer.substring(BUFFER_MIN,
												BUFFER_MAX));

						// Get the country name
						String country = items[ISOData.COUNTRY_NAME];

						// Get the ISO3166-1 info for this country
						if (isoCode == null) {
							isoCode = GenUtils.getISOCode(country, codeList);
						}

						// Read and keep in memory a list of the population data
						// for this country.
						if (populationList == null) {
							String name = populationData + country
									+ "_POPULATION.txt"; //$NON-NLS-1$
							populationList = GenUtils.populateList(name);
						}

						// Get the level of the data we are processing.
						level = GenUtils.getDataLevel(items);

						// This variable indicates a transition in our data
						// processing
						boolean transition = (level != oldLevel) ? true : false;

						if (transition == true) {
							// Keep track of previous level in order to detect
							// transitions.
							oldLevel = level;
							// Reset our repeated history
							// repeatedEntries = new String();
							entryList = new ArrayList<String>();
							if (propertiesFile != null) {
								System.out.println("\t\tGenerated : " //$NON-NLS-1$
										+ fileName);
								propertiesFile.close();
								propertiesFile = null;
								fileName = null;
							}
						}

						// If the directory does not exist for this country,
						// create it using ISO alpha3 code.
						File countryDir = new File(outputDir
								+ isoCode.getAlpha3());

						if (countryDir != null && countryDir.exists() == false) {
							// Directory does not exist. Create it.
							countryDir.mkdir();
						}

						// Create and open the new file.
						if (propertiesFile == null) {
							// Create the file name for the new properties.file.
							String alpha3 = isoCode.getAlpha3();
							fileName = countryDir + "\\" + alpha3 + "_" //$NON-NLS-1$ //$NON-NLS-2$
									+ "population.profile"; //$NON-NLS-1$
							// Create tne new population file for this country.
							propertiesFile = GenUtils.openWriter(fileName);
						}

						// The current entity we are dealing with.
						// Based on its admin level it can be a country, a
						// county, etc;
						String current = null;

						switch (level) {

						case DataTypes.LEVEL_0:
							// Do Nothinng
							continue;

						case DataTypes.LEVEL_1:
							current = items[ISOData.ADMIN1_NAME].toUpperCase();
							break;

						case DataTypes.LEVEL_2:
							current = items[ISOData.ADMIN2_NAME].toUpperCase();
							break;

						} // Switch

						// Handle UNKNOWN and N.A. entries differently.
						if (current.equals("UNKNOWN") || current.equals("N.A.")) { //$NON-NLS-1$ //$NON-NLS-2$

							if (level == 1) {
								current = isoCode.getAlpha2() + "-UNK-" //$NON-NLS-1$
										+ String.valueOf(unkCounter++);

							} else {
								// Level 2 data.
								current = isoCode.getAlpha3() + ".L2." //$NON-NLS-1$
										+ String.valueOf(unkCounter++);
							}
						}

						// Make sure this is not a repeated level 1 entry.
						if (level == 1) {

							if (GenUtils.isRepeated(current, entryList) == true) {
								// Duplicated, ignore.
								continue;
							}

							// Remember this entity to avoid duplicates.
							entryList.add(current);
						}

						// Make sure this is not a repeated level 2 entry.
						if (level == 2) {

							if (GenUtils.isRepeated(items[ISOData.ADMIN1_NAME]
									+ current, entryList) == true) {
								// Duplicated, ignore.
								continue;
							}

							// Remember this entity to avoid duplicates.
							entryList.add(items[ISOData.ADMIN1_NAME] + current);
						}

						String population = null;

						population = getPopulation(items[ISOData.ADMIN1_NAME],
								items[ISOData.COUNTRY_NAME], level);

						// increase count for the container of this entity
						String pVal = pData.get(items[ISOData.ADMIN1_NAME]);

						if (pVal == null) {

							pData.put(items[ISOData.ADMIN1_NAME], population
									+ ":" + 0); //$NON-NLS-1$
							continue;
						}

						// Tokenize
						StringTokenizer tokenizer = new StringTokenizer(pVal,
								":"); //$NON-NLS-1$

						// We know that we expect only two tokens. Get them.
						// Order matters here.
						String[] tokens = new String[2];
						// This is the location
						tokens[CONTAINER] = tokenizer.nextToken().toUpperCase();
						// This is the count of entities
						tokens[COUNT] = tokenizer.nextToken();

						int count = Integer.parseInt(tokens[1]);
						count++;

						pVal = tokens[CONTAINER] + ":" + count; //$NON-NLS-1$

						pData.put(items[ISOData.ADMIN1_NAME], pVal);

						System.out.println("Counted : " //$NON-NLS-1$
								+ items[ISOData.ADMIN1_NAME] + " ADMIN 2 : " //$NON-NLS-1$
								+ items[ISOData.ADMIN2_NAME]);

					} // while loop

					// Now write the results
					Iterator it = pData.keySet().iterator();

					while (it.hasNext()) {

						String key = (String) it.next();
						System.out.println("Key is " + key); //$NON-NLS-1$

						String value = pData.get(key);
						System.out.println("Value is : " + value); //$NON-NLS-1$

						GenUtils.addData(propertiesFile, key + ":" + value //$NON-NLS-1$
								+ "\n"); //$NON-NLS-1$

					}

					// We dont need the ISO code object for this country
					// anymore, we are done processing it.
					isoCode = null;

					// Reset the population list
					if (populationList != null) {
						populationList.clear();
						populationList = null;
					}

					// Reset teh hash map
					pData.clear();

					// Close all open resources.
					reader.close();

					if (propertiesFile != null) {
						propertiesFile.close();
						propertiesFile = null;
						System.out.println("\t\tGenerated : " + fileName); //$NON-NLS-1$
						fileName = null;
					}

					// Run garbage collection
					System.gc();
				}

			} catch (IOException e) {
				e.printStackTrace();
			}

		} // For loop

	}

	/**
	 * Find the population value for a given entity.
	 * 
	 * @param entity
	 *            a location
	 * @param level
	 *            administrative level
	 * 
	 * @return the population value for a location
	 */
	protected String getPopulation(String entity, int level) {

		final int ENTITY = 0;

		final int LEVEL = 1;

		final int POPULATION = 2;

		Iterator it = populationList.iterator();

		String entityStr = entity.toUpperCase();

		while (it.hasNext()) {

			String[] popData = ((String) it.next()).split(":"); //$NON-NLS-1$

			if (entityStr.equals(popData[ENTITY].trim().toUpperCase())) {

				// Check both entities belong to same level
				if (String.valueOf(level).equals(popData[LEVEL].trim()) == false) {
					// Ignore : same name , but different level
					continue;
				}

				return popData[POPULATION].trim();
			}
		}
		return DEFAULT;
	}

	/**
	 * Find the population value for a given entity.
	 * 
	 * @param entity
	 *            a location
	 * 
	 * @param entityContainer
	 *            a container for the location (entity)
	 * 
	 * @return the population value for a location
	 */
	protected String getPopulation(String entity, String entityContainer,
			int level) {

		final int ENTITY = 0;

		final int LEVEL = 1;

		final int POPULATION = 2;

		String[] popData = null;

		Iterator it = populationList.iterator();

		String entityStr = entity.toUpperCase().trim();

		String entityContainerStr = entityContainer.toUpperCase();

		while (it.hasNext()) {

			popData = ((String) it.next()).split(":"); //$NON-NLS-1$

			String[] compare = null;

			// Get the entity name and remove blanks
			String location = popData[ENTITY];

			location = location.replace("\\t", ""); //$NON-NLS-1$ //$NON-NLS-2$

			location = location.toUpperCase().trim();

			// Handle Aliases -- complex stuff :$
			if (location.contains("[")) { //$NON-NLS-1$

				location = location.replace("]", ""); //$NON-NLS-1$ //$NON-NLS-2$

				// Tokenize
				StringTokenizer tokens = new StringTokenizer(location, "["); //$NON-NLS-1$

				// Get tokens -> order matters here!
				compare = new String[tokens.countTokens()];
				int i = 0;
				while (tokens.hasMoreTokens()) {
					compare[i++] = tokens.nextToken().toUpperCase().trim();
				}
			}

			if (compare != null) {
				for (int i = 0; i < compare.length; i++) {
					if (entityStr.replace("-", " ").equals( //$NON-NLS-1$ //$NON-NLS-2$
							compare[i].replace("-", " ")) //$NON-NLS-1$ //$NON-NLS-2$
							|| compare[i].equals(entityStr + "-1") //$NON-NLS-1$
							|| compare[i].equals(entityContainerStr)) {

						return popData[POPULATION].trim();
					}

				}
			} else if (entityStr.equals(location)
					|| entityStr.replace("-", " ").equals(location) //$NON-NLS-1$ //$NON-NLS-2$
					|| location.equals(entityStr + "-1")) { //$NON-NLS-1$

				// Check both entities belong to same level
				if (String.valueOf(level).equals(popData[LEVEL].trim()) == false) {
					// Ignore : same name , but different level
					continue;
				}

				return popData[POPULATION].trim();

			}

		} // while

		return DEFAULT;
	}

	/**
	 * Main execution entry point.
	 * 
	 * @param args
	 * 
	 */
	public static void main(String[] args) {
		final int ISO_3166_1 = 0;
		final int POPULATION_DATA = 1;
		final int TARGET_LIST = 2;
		final int OUTPUT_DIR = 3;
		final int PARAMS = 4;

		if (args.length < PARAMS) {
			System.out.println("--Wrong arguments--"); //$NON-NLS-1$
			System.out
					.println("\tTo run, please provide the following arguments : "); //$NON-NLS-1$
			System.out.println("\t\t ISO 3166-1 file"); //$NON-NLS-1$
			System.out.println("\t\t Population data file"); //$NON-NLS-1$
			System.out.println("\t\t Target countries file"); //$NON-NLS-1$			
			System.out.println("\t\t Output directory"); //$NON-NLS-1$								
			return;
		}

		PopulationProfiler profiler = new PopulationProfiler(args[ISO_3166_1],
				args[POPULATION_DATA], args[TARGET_LIST], args[OUTPUT_DIR]);
		profiler.run();
	}

}
