blob: 76f5fd8491c997dd00e62c59a90e829e4549f996 [file] [log] [blame]
// DataProcessor.java
package org.eclipse.stem.util.analysis;
/*******************************************************************************
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
* IBM Corporation, BfR, and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* Contributors:
* IBM Corporation - initial API and implementation and new features
* Bundesinstitut für Risikobewertung - Pajek Graph interface, new Veterinary Models
*******************************************************************************/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.eclipse.stem.core.common.util.LogInitializationException;
import org.eclipse.stem.definitions.Activator;
/**
* This class reads incidence data from files (file names keyed by location) and
* creates the new format CSV files for STEM.
* It rewrites the incidence.
* It computes and writes the SIR approximations from the incidence and the
* population data.
*/
public class DataProcessor {
/**
* Name of the result files
*/
private static final String INCIDENCE_FILE_NAME = "Incidence_2.csv";
private static final String SUSCEPTIBLE_FILE_NAME = "S_2.csv";
private static final String INFECTIOUS_FILE_NAME = "I_2.csv";
private static final String RECOVERED_FILE_NAME = "R_2.csv";
private static final String POPULATION_FILE_NAME = "P_2.csv";
private static final String REPORTING_FRACTION_FILE_NAME = "reportingFraction.csv";
/**
* Incidences per location id
*/
private Map<String,List<Data>> incidenceMap = new HashMap<String,List<Data>>();
/**
* Population size per location id
*/
private Map<String,Double> populationMap = new HashMap<String,Double>();
/**
* Maximum number of iterations (days) for a specific location
*/
private int maxIteration = 0;
/**
* Reference population day (day of population data versus the start date
* of the data)
*/
private long referencePopulationDay;
/**
* Population change per day (population size at the start date of the data
* versus the size at the end date of the data)
*/
private double fractChangePopulationPerDay;
/**
* Computes and writes the SIR approximations from the incidence and the
* population data.
*
* @param sourceFolder The folder of the source data
* @param targetFolder The target folder of the results
* @param startDate The date in which the data starts
* @param endDate The date in which the data ends
* @param populationDataFile The population data file
* @param populationSizeStartYear Population size in the first year of the data
* @param populationSizeEndYear Population size in the last year of the data
* @param recoveryRate Average time it takes an infectious person to recover
* @param susceptibleRate Rate of initial susceptible to disease
* @param immunityLossRate Immunity loss rate to disease
*
* @throws LogInitializationException Thrown when the process fails
*/
public void process(
String sourceFolder,
String targetFolder,
Date startDate,
Date endDate,
String populationDataFile,
long populationSizeStartYear,
long populationSizeEndYear,
double recoveryRate,
double susceptibleRate,
double immunityLossRate) throws LogInitializationException {
readPopulationData(
populationDataFile,
startDate,
endDate,
populationSizeStartYear,
populationSizeEndYear);
readSourceData(sourceFolder);
modelDiseaseState(recoveryRate, susceptibleRate, immunityLossRate);
writeResults(targetFolder);
} // process
/**
* Read the population data.
*
* @param populationDataFile The population data files
* @param startDate The date in which the data starts
* @param endDate The date in which the data ends
* @param populationSizeStartYear Population size in the first year of the data
* @param populationSizeEndYear Population size in the last year of the data
*
* @exception LogInitializationException
* Thrown when failed to load the population data
*/
private void readPopulationData(
String populationDataFile,
Date startDate,
Date endDate,
long populationSizeStartYear,
long populationSizeEndYear) throws LogInitializationException {
final String POPULATION_YEAR = "YEAR";
File file = new File(populationDataFile);
BufferedReader fileReader = openReader(file);
if (fileReader == null) {
throw new LogInitializationException(
"Failed to open population data file " + populationDataFile);
}
// initialize the population data year to the first year of the data
Calendar cal = Calendar.getInstance();
cal.setTime(startDate);
int populationDataYear = cal.get(Calendar.YEAR);
try {
String buffer = null;
while (EOF(buffer = fileReader.readLine()) != true) {
int idx = buffer.indexOf("=");
if(idx >= 0) {
String id = (buffer.substring(0,idx)).trim();
String value = (buffer.substring(idx+1,buffer.length())).trim();
if(id.equals(POPULATION_YEAR)) {
// extracts the population data year from the file
try {
populationDataYear = Integer.parseInt(value);
} catch (NumberFormatException ex) {
// ignore
Activator.logInformation("Failed to read population year", ex);
}
} else if(id.indexOf("-") >= 0){
try {
populationMap.put(id,Double.valueOf(value));
} catch (NumberFormatException ex) {
// ignore
Activator.logInformation("Failed to read population for " + id, ex);
}
}
}
}
if(populationMap.isEmpty()) {
throw new LogInitializationException("Failed to read population data from " + file.getName());
}
// computation of reference population day
cal = Calendar.getInstance();
cal.set(Calendar.DAY_OF_YEAR, 1);
cal.set(Calendar.YEAR, populationDataYear); // population data year
long offset = cal.getTimeInMillis()-startDate.getTime(); // in milliseconds
referencePopulationDay = offset/(1000*60*60*24);
// computation of population change per day
offset = endDate.getTime()-startDate.getTime(); // in milliseconds
long offsetDays = offset/(1000*60*60*24);
double changePopulationPerDay =
((double)populationSizeEndYear - (double)populationSizeStartYear)/(offsetDays+1);
fractChangePopulationPerDay = changePopulationPerDay/populationSizeStartYear;
} catch(IOException ex) {
throw new LogInitializationException(ex);
}
} // readPopulationData
/**
* Read the source data.
*
* @param sourceFolder The folder of the source data
*
* @exception LogInitializationException
* Thrown when failed to load the source data files
*/
private void readSourceData(
String sourceFolder) throws LogInitializationException {
File file = new File(sourceFolder);
int numFiles = 0;
File[] allFiles = file.listFiles();
for (int i = 0; i < allFiles.length; i++) {
try {
readDataFromSourceFile(allFiles[i]);
numFiles ++;
} catch(LogInitializationException ex) {
Activator.logInformation("Failed to read data from " + allFiles[i].getName(), ex);
}
}
if(numFiles == 0) {
throw new LogInitializationException("Failed to read source data from " + sourceFolder);
}
} // readSourceData
/**
* Read data from a source file.
*
* @param file A source file
*
* @exception LogInitializationException
* Thrown when failed to load a source file
*/
private void readDataFromSourceFile(
File file) throws LogInitializationException {
String fileName = file.getName();
if (fileName.indexOf(".txt") < 0) {
return; // ignore
}
BufferedReader fileReader = openReader(file);
if (fileReader == null) {
throw new LogInitializationException("Failed to open file " + fileName);
}
int idx = fileName.indexOf(".txt");
String idInput = fileName.substring(0,idx);
String id = convertID(idInput);
if (id == null) {
throw new LogInitializationException("Failed to convert " + idInput + " to STEM ID");
}
Double population = populationMap.get(id);
if (population == null) {
throw new LogInitializationException("No population found for " + id + " from " + idInput);
}
if(!incidenceMap.containsKey(id)) {
List<Data> incList = new ArrayList<Data>();
incidenceMap.put(id, incList);
}
List<Data> incList = incidenceMap.get(id);
try {
String buffer = null;
fileReader.readLine(); // skip the header
while (EOF(buffer = fileReader.readLine()) != true) {
StringTokenizer tokenizer = new StringTokenizer(buffer, ","); //$NON-NLS-1$
String iteration = tokenizer.nextToken(",");
String time = tokenizer.nextToken(",");
String inc = tokenizer.nextToken(",");
Integer count = Integer.valueOf(iteration.trim());
double reports = (Double.valueOf(inc)).doubleValue();
double pop = getPopulation(count.intValue(), population.doubleValue());
Data data = new Data(count, time, reports, pop);
incList.add(data);
if(incList.size() > maxIteration) {
maxIteration = incList.size();
}
}
} catch(IOException ex) {
throw new LogInitializationException(ex);
}
} // readDataFromSourceFile
/**
* Model the S,I,R disease parameters from incidence data, population, and
* modeled parameters for reporting fraction and recovery rate.
*
* @param recoveryRate Average time it takes an infectious person to recover
* @param susceptibleRate Rate of initial susceptible to disease
* @param immunityLossRate Immunity loss rate to disease
*/
private void modelDiseaseState(
double recoveryRate,
double susceptibleRate,
double immunityLossRate) {
// for all locations
Iterator<String> iter = populationMap.keySet().iterator();
while(iter.hasNext()) {
String id = iter.next();
if(!incidenceMap.containsKey(id)) {
// ignore - no incidences for this location id
continue;
}
List<Data> dataList = incidenceMap.get(id);
double minFraction = 10.0;
double bestFraction = 1.0;
double minSlope = Double.MAX_VALUE;
// drop the reporting fraction until we hit negative S
// this sets the MIN reporting fraction
boolean valid = true;
boolean pointOK = true;
final int maxdelta = 1000;
int idelta = maxdelta;
double scale = 100000;
for (int ifraction = (int)scale; ((ifraction > 1)&&valid); ifraction -= idelta) {
pointOK = true;
if((ifraction<= idelta)&&(idelta >=10)){
idelta /= 10;
}
double fraction = ifraction/scale;
// for all iterations
// re-scale the incidence and integrate it over time
// and subtracting from the susceptible at each time step
double integratedInfectious = 0.0;
double integratedRecoveries = 0.0;
double[] ydataS = new double[maxIteration];
double[] ydataR = new double[maxIteration];
double[] xdata = new double[maxIteration];
for(int i=0; i<maxIteration; i++) {
ydataS[i] = 0.0;
ydataR[i] = 0.0;
xdata[i] = 0.0;
}
for (int i = 0; ((i < dataList.size())&&valid); i ++) {
Data data = dataList.get(i);
double incidence = data.reports/fraction;
double population = data.population;
integratedInfectious += incidence;
double recoveries = integratedInfectious * recoveryRate;
integratedRecoveries += recoveries;
// set the initial recovered
if(i==0){
integratedRecoveries += (1.0-susceptibleRate) * population;
}
integratedInfectious -= recoveries;
double infectious = integratedInfectious;
integratedRecoveries *= (1.0 - immunityLossRate);
double recovered = integratedRecoveries;
double susceptible = population - (infectious + recovered);
ydataS[i] = susceptible/population;
ydataR[i] = recovered/population;
xdata[i] = i;
if((susceptible >= 0.0) &&(recovered>=0.0)&&(infectious>= 0.0)){
minFraction = fraction;
data.reportingFraction = minFraction;
} else {
pointOK = false;
if(idelta > 1) {
// back up and take smaller steps
ifraction += idelta;
idelta /= 10;
} else {
valid = false;
idelta = maxdelta;
}
break;
}
if(!(valid&&pointOK)) break;
} // for i time steps
if(valid&&pointOK) {
double slope = getSlopeLinear(xdata,ydataS);
if (Math.abs(slope) <= minSlope) {
minSlope = Math.abs(slope);
bestFraction = fraction;
}
}
} // for all reporting fractions
// set the fractions
for (int i = 0; ((i < dataList.size())&&valid); i ++) {
Data data = dataList.get(i);
if((bestFraction< 0.9)&&(bestFraction >= data.reportingFraction)){
data.reportingFraction = bestFraction;
}
} // for all times
} // for all locations
// for all locations
iter = populationMap.keySet().iterator();
while(iter.hasNext()) {
String id = iter.next();
if(!incidenceMap.containsKey(id)) {
// ignore - no incidences for this location id
continue;
}
List<Data> dataList = incidenceMap.get(id);
// for all iterations
// re-scale the incidence and integrate it over time
// and subtracting from the susceptible at each time step
double integratedInfectious = 0.0;
double integratedRecoveries = 0.0;
// set the data
for (int i = 0; (i < dataList.size()); i ++) {
Data data = dataList.get(i);
double fraction = data.reportingFraction;
double incidence = data.reports/fraction;
double population = data.population;
integratedInfectious += incidence;
double recoveries = integratedInfectious * recoveryRate;
integratedRecoveries += recoveries;
// set the initial recovered
if(i==0){
integratedRecoveries += (1.0-susceptibleRate) * population;
}
integratedInfectious -= recoveries;
data.infectious = integratedInfectious;
integratedRecoveries *= (1.0 - immunityLossRate);
data.recovered = integratedRecoveries;
data.susceptible = population - (data.infectious + data.recovered);
} // for i time steps
} // for all locations
} // modelDiseaseState
/**
* Write the results to the target folder.
*
* @param targetFolder The target folder of the results
*
* @exception LogInitializationException
* Thrown when failed to write results
*/
private void writeResults(
String targetFolder) throws LogInitializationException {
try {
if(!targetFolder.endsWith(File.separator)) {
targetFolder += File.separator;
}
Writer fw1 = new FileWriter(targetFolder + INCIDENCE_FILE_NAME);
Writer fw2 = new FileWriter(targetFolder + SUSCEPTIBLE_FILE_NAME);
Writer fw3 = new FileWriter(targetFolder + INFECTIOUS_FILE_NAME);
Writer fw4 = new FileWriter(targetFolder + RECOVERED_FILE_NAME);
Writer fw5 = new FileWriter(targetFolder + POPULATION_FILE_NAME);
Writer fw6 = new FileWriter(targetFolder + REPORTING_FRACTION_FILE_NAME);
String[] keys = new String[populationMap.keySet().size()];
Iterator<String> iter = populationMap.keySet().iterator();
int counter=0;
while(iter.hasNext()) {
keys[counter++] = iter.next();
}
Arrays.sort(keys);
String headerText = "iteration,time";
StringBuilder strBldr = new StringBuilder(headerText);
for (int i = 0; i < keys.length; i ++) {
if (incidenceMap.containsKey(keys[i])) {
strBldr.append(",");
strBldr.append(keys[i]);
}
}
strBldr.append("\n");
String header = strBldr.toString();
fw1.write(header);
fw2.write(header);
fw3.write(header);
fw4.write(header);
fw5.write(header);
fw6.write(header);
// for all iterations
for (int i = 0; i < maxIteration; i ++) {
// for all id (columns)
String line1 = "";
String line2 = "";
String line3 = "";
String line4 = "";
String line5 = "";
String line6 = "0,0";
for (int j = 0; j < keys.length; j ++) {
String id = keys[j];
if (incidenceMap.containsKey(id)) {
List<Data> dataList = incidenceMap.get(id);
if(i >= dataList.size()) {
break;
}
Data data = dataList.get(i);
if(i==0) {
line6 += ","+data.reportingFraction;
}
if(j == 0) {
line1 += data.iteration.intValue()+","+data.time;
line2 += data.iteration.intValue()+","+data.time;
line3 += data.iteration.intValue()+","+data.time;
line4 += data.iteration.intValue()+","+data.time;
line5 += data.iteration.intValue()+","+data.time;
}
line1 += ","+data.reports;
line2 += ","+data.susceptible;
line3 += ","+data.infectious;
line4 += ","+data.recovered;
line5 += ","+data.population;
}
}
line1 += "\n";
line2 += "\n";
line3 += "\n";
line4 += "\n";
line5 += "\n";
if (i==0) line6 += "\n";
fw1.write(line1);
fw2.write(line2);
fw3.write(line3);
fw4.write(line4);
fw5.write(line5);
if (i==0) fw6.write(line6);
}
fw1.flush();
fw1.close();
fw2.flush();
fw2.close();
fw3.flush();
fw3.close();
fw4.flush();
fw4.close();
fw5.flush();
fw5.close();
fw6.flush();
fw6.close();
} catch (IOException ex) {
throw new LogInitializationException(ex);
}
} // writeResults
/**
* Returns the STEM ID corresponding to the input ID
*
* @param inputID The location id as extracted from the file name
*
* @return The STEM ID as extracted from the population file
*/
public String convertID(String inputID) {
String inputCode2 = getAdmin2Code(inputID);
Iterator<String> iter = populationMap.keySet().iterator();
while(iter.hasNext()) {
String id = iter.next();
if(getAdmin2Code(id).equalsIgnoreCase(inputCode2)) return id;
}
return null;
} // convertID
/**
* Find the STEM ID by matching region2 code part
*
* @param id The location id as extracted from the file name
*
* @return The STEM ID as extracted from the population file
*/
public static String getAdmin2Code(String id) {
int idx = id.lastIndexOf("-");
return id.substring(idx+1,id.length());
} // getAdmin2Code
/**
* Do the linear fitting.
*
* @param xData
* @param yData
*
* @return slope
*/
private double getSlopeLinear(double[] xData, double[] yData) {
LinearLeastSquaresFit linFit = new LinearLeastSquaresFit(xData, yData);
return linFit.getSlope();
} // getSlopeLinear
/**
* Returns the population size for a given iteration.
* computation is based on
* pj(t) = pj0 + fractChangePopulationPerDay*(iteration-referencePopulationDay)
*
* @param iteration The iteration count (day number)
* @param refPop The size of the population as extracted from the file
*
* @return The population size for a given iteration
*/
private double getPopulation(int iteration, double refPop) {
double increase = (iteration-referencePopulationDay) * fractChangePopulationPerDay;
return refPop*(1 + increase);
} // getPopulation
/**
* Create a new BufferedReader
*
* @param f The file we want to open for reading
*
* @return a reader to a given file
*/
public static BufferedReader openReader(File f) {
try {
if (f.exists() == false) {
return null;
}
return new BufferedReader(new FileReader(f));
} catch (Exception e) {
e.printStackTrace();
}
return null;
} // openReader
/**
* Indicate End-Of-File
*
* @param buffer A buffer of data
*
* @return True if we have reached End-Of-File
*/
public static boolean EOF(String buffer) {
if (buffer == null || buffer.length() == 0)
return true;
return false;
} // EOF
/**
* Class for each row in the input files
*/
public static class Data {
/**
* Reported incidence
*/
public double reports;
/**
* Reporting fraction
*/
public double reportingFraction = -1.0;
/**
* S
*/
public double susceptible = 0.0;
/**
* I
*/
public double infectious = 0.0;
/**
* R
*/
public double recovered = 0.0;
/**
* Population size
*/
public double population = 0.0;
/**
* Iteration count (day number)
*/
public Integer iteration;
/**
* Time associated with this row
*/
public String time;
/**
* @param iter Iteration count (day number)
* @param time The date of the report
* @param reported Number of reported incidence
* @param pop Population size
*/
public Data(Integer iter, String time, double reported, double pop) {
this.iteration = iter;
this.time = time;
this.reports = reported;
this.population = pop;
}
/**
* @return The reference population size
*/
public double getReferencePopulation() {
return population;
}
/**
* Set the reference population size
*
* @param localPopulation The reference population size
*/
public void setLocalPopulation(double localPopulation) {
this.population = localPopulation;
this.susceptible = localPopulation; // initially
}
} // Data
} // DataProcessor