blob: b31da1329fed99eb2f839456f62c0b734a86f227 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2019 Dortmund University of Applied Sciences and Arts.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* FH Dortmund - initial API and implementation
*******************************************************************************/
package org.eclipse.app4mc.gsoc_rta;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.eclipse.app4mc.amalthea.model.Amalthea;
import org.eclipse.app4mc.amalthea.model.CallSequenceItem;
import org.eclipse.app4mc.amalthea.model.InterProcessStimulus;
import org.eclipse.app4mc.amalthea.model.InterProcessTrigger;
import org.eclipse.app4mc.amalthea.model.Label;
import org.eclipse.app4mc.amalthea.model.LabelAccess;
import org.eclipse.app4mc.amalthea.model.LabelAccessEnum;
import org.eclipse.app4mc.amalthea.model.Process;
import org.eclipse.app4mc.amalthea.model.ProcessingUnit;
import org.eclipse.app4mc.amalthea.model.PuType;
import org.eclipse.app4mc.amalthea.model.Runnable;
import org.eclipse.app4mc.amalthea.model.SetEvent;
import org.eclipse.app4mc.amalthea.model.Task;
import org.eclipse.app4mc.amalthea.model.TaskRunnableCall;
import org.eclipse.app4mc.amalthea.model.Time;
import org.eclipse.app4mc.amalthea.model.TimeUnit;
import org.eclipse.app4mc.amalthea.model.util.FactoryUtil;
import org.eclipse.app4mc.amalthea.model.util.SoftwareUtil;
import org.eclipse.emf.common.util.EList;
public class Contention {
/*
* this class will give the worst case latency for each task when memory
* contention happen.
*/
private final int[] ia;
private final Amalthea model;
private final int[] flagArray;
private final HashMap<Task, List<Label>[]> gpuLabels = new HashMap<Task, List<Label>[]>();
public Contention(final int[] iap, final Amalthea modelp) {
this.ia = iap;
this.model = modelp;
if (iap == null) {
this.flagArray = null;
} else {
this.flagArray = new int[iap.length];
}
}
/**
* Checking whether ProcessingUnit is GPU or CPU
* @param pu
* @return true for CPU and false for GPU
*/
public static boolean identifyPUType(final ProcessingUnit pu) {
/* true=CPU; false=GPU; */
boolean result = false;
if (pu.getDefinition().getPuType().equals(PuType.CPU)) {
result = true;
}
else {
result = false;
}
return result;
}
/**
* Initiallize hashmap contain GPU tasks' labels hashmap <task, List<label>> (readList = hashmap.get(task)[0];)
* and create flagarray that have the same length with our integerArray, then loop through all the PRE_POST tasks ( via looking for InterProcessStimulus) and check whether the triggered task (GPU task) are located on GPU or not, if it is, then I will give the value of that task in flagArray equal 1. It will look like this
ia = { 5, 1, 5, 0, 1, 0, 2, 1, 2, 1, 6, 3, 4, 6 } - our mapping array
fa = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,} - the flag array
*
* @param tpumapping (or ia)
*/
private void setUpFlagArrayAndHashMap(final int[] iap) {
final EList<Task> modelTask = this.model.getSwModel().getTasks();
/* get gpu task list via checking task with InterProcessStimulus*/
final List<Task> gpuTask = modelTask.stream().filter(a -> a.getStimuli().get(0) instanceof InterProcessStimulus).collect(Collectors.toList());
/* get shotting task start*/
for (final Task t : gpuTask) {
/* shotting task => t.getName */
final List<SetEvent> shottingTaskEvent = SoftwareUtil.collectSetEvents(t, null);
final Process shottingTask = shottingTaskEvent.get(0).getProcess();
final int shotIndex = this.model.getSwModel().getTasks().indexOf(shottingTask);
final int gpuIndex = this.model.getSwModel().getTasks().indexOf(t);
if (iap[gpuIndex] != 6) {
/* If GPU is at GPU task then no flag */
this.flagArray[shotIndex] = 1;
}
/* create HashMap for GPU task that mapped to CPU start here*/
final List<Label> readLabelList = new ArrayList<Label>();
final List<Label> writeLabelList = new ArrayList<Label>();
final List<CallSequenceItem> callList = SoftwareUtil.collectCalls(shottingTask);
final CallSequenceItem ipt = callList.stream().filter(a -> a instanceof InterProcessTrigger).iterator().next();
/*
* get the position of InterProcessTrigger within task taking
* readLabel and write labels from pre and post processing (
* preprocessing happen before trigger and post_processing happen
* after the trigger )
*/
final int indexforTrigger = callList.indexOf(ipt);
for (int i = 0; i < callList.size(); i++) {
Runnable thisRunnable = null;
/* Pre-processing Runnable */
if ((i < indexforTrigger) && (callList.get(i) instanceof TaskRunnableCall)) {
thisRunnable = ((TaskRunnableCall) callList.get(i)).getRunnable();
final List<LabelAccess> thisLAList = SoftwareUtil.getLabelAccessList(thisRunnable, null);
for (final LabelAccess la : thisLAList) {
if (la.getAccess().equals(LabelAccessEnum.READ)) {
readLabelList.add(la.getData());
}
}
}
/* Post-processing Runnable */
else if ((i > indexforTrigger) && (callList.get(i) instanceof TaskRunnableCall)) {
thisRunnable = ((TaskRunnableCall) callList.get(i)).getRunnable();
final List<LabelAccess> thisLAList = SoftwareUtil.getLabelAccessList(thisRunnable, null);
for (final LabelAccess la : thisLAList) {
if (la.getAccess().equals(LabelAccessEnum.WRITE)) {
writeLabelList.add(la.getData());
}
}
}
}
@SuppressWarnings("unchecked")
final List<Label>[] aryofLabelList = new ArrayList[2];
aryofLabelList[0] = readLabelList;
aryofLabelList[1] = writeLabelList;
this.gpuLabels.put(t, aryofLabelList);
// HashMap created with <Task, ArrayofLabelList>
}
}
/** Calculating contention for CPU task only
*
* @param task
* @return time
*/
public Time contentionForTask(final Task task) {
/* Initialize hashmap */
setUpFlagArrayAndHashMap(this.ia);
final Time time = FactoryUtil.createTime(BigInteger.ZERO, TimeUnit.PS);
/**
* Constant and variable init, change those constant for different model number
*
* The arrayList below are used to find number of core interfering and also for easier debugging
* b1 = current task's name
* b2 = intefering task's name
* c1 = current task's core
* c2 = intefering task's core
* c3 = core definition's name (to check is task PU to apply the appropiate calculation)
* d2 = c2 but reduced duplication
*/
final ArrayList<String> b1 = new ArrayList<>();
final ArrayList<String> b2 = new ArrayList<>();
final ArrayList<String> c1 = new ArrayList<>();
final ArrayList<String> c2 = new ArrayList<>();
final ArrayList<String> c3 = new ArrayList<>();
ArrayList<String> d2 = new ArrayList<>();
/* name of GPU core, use for identification task's core while calculating */
final String GPU = "GPU";
/* name of CPU core, use for identification task's core while calculating
* can't use the identifyPU type above to distinguished because we have 2 different CPU in this model*/
final String A57CPU = "A57";
final String DenverCPU = "Denver";
/* time taken to read or write a cacheLine (64B)*/
final double A57_Baseline = 20.0;
final double Denver_Baseline = 8.0;
final double GPU_Baseline = 3.0;
/*increase in latency operated by a single interfering core.
* Do note: it does not matter if the interfering core is Denver or A57
* : this number only depends on the observed CPU core (CPUtype)*/
final double A57_KConstant = 20.0;
final double Denver_KConstant = 2.0;
/* sensibility to GPU CE activity.*/
final double A57_sGPU = 100.0;
final double Denver_sGPU = 20.0;
/*bGPU = boolean, 1: if GPU is operating the copy engine, 0 other wise
* We are assuming WCET so this will always be 1 in this implementation*/
final double bGPU = 1.0;
/*Computation to get NUMBER OF CORE intefering start*/
final List<ProcessingUnit> pul = CommonUtils.getPUs(this.model);
for (final Task t : this.model.getSwModel().getTasks()) {
if (t.equals(task)) {
/* Checking if task is flagged or not, because
* if GPU task is on CPU => PREPOST will not be considered */
final int currentTaskIndex = this.model.getSwModel().getTasks().indexOf(task);
if (this.flagArray[currentTaskIndex] == 1) {
return time;
}
for (final Label l : CommonUtils.getAccessedLabelSet(t)) {
final Set<Runnable> wL = SoftwareUtil.getWriterSetOfLabel(l, null);
for (final Runnable r : wL) {
final List<Process> lT = SoftwareUtil.getProcesses(r, null);
for (final Process lTT : lT) {
/* get related task through checking which runnable are accessing the same label
* put the task that contain those runnable into b2 and the core of that task in c2
*/
for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) {
final ProcessingUnit pup = pul.get(this.ia[taskIndex]);
final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName();
final String coreName = pup.getName();
if (t.getName().equals(taskName)) {
b1.add(taskName);
c1.add(coreName);
c3.add(pup.getDefinition().getName());
}
final int b2Index = this.model.getSwModel().getTasks().indexOf(lTT);
/*only put that task in our array if task isn't flagged (check for
* unnecessary PRE_POST task, we assume those task are gone when
* corresponded GPU task is mapped to CPU)*/
if (lTT.getName().equals(taskName) && (this.flagArray[b2Index] == 0)) {
b2.add(taskName);
c2.add(coreName);
}
}
}
}
final Set<Runnable> rL = SoftwareUtil.getReadersSetOfLabel(l, null);
for (final Runnable r : rL) {
final List<Process> lT1 = SoftwareUtil.getProcesses(r, null);
for (final Process lTT1 : lT1) {
/* get related task through checking which runnable are accessing the same label
* put the task that contain those runnable into b2 and the core of that task in c2
*/
for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) {
final ProcessingUnit pup = pul.get(this.ia[taskIndex]);
final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName();
final String coreName = pup.getName();
if (t.getName().equals(taskName)) {
b1.add(taskName);
c1.add(coreName);
c3.add(pup.getDefinition().getName());
}
/*only put that task in our array if task isn't flagged (check for
* unnecessary PRE_POST task, we assume those task are gone when
* corresponded GPU task is mapped to CPU)*/
final int b2Index1 = this.model.getSwModel().getTasks().indexOf(lTT1);
if (lTT1.getName().equals(taskName) && (this.flagArray[b2Index1] == 0)) {
b2.add(taskName);
c2.add(coreName);
}
}
}
}
}
/* Remove all the duplicates inside c2.*/
d2 = (ArrayList<String>) c2.stream().distinct().collect(Collectors.toList());
/* Hashmap of <Core, Integer>
* Assign value for other task as 1, GPU task and observed task as 0
* total of value in integer will be equal to number of core intefering
* Output of hashMap look like this {Core3=1, Core4=1, Core0=0, GP10B=0}*/
final HashMap<Object, Integer> coreHashMap = new HashMap<Object, Integer>();
for (final ProcessingUnit pu : pul) {
final String puName = pu.getName();
for (final String cName : d2) {
if (cName.equals(puName)) {
if (identifyPUType(pu)) {
coreHashMap.put(puName, 1);
if (puName.equals(c1.get(0))) {
coreHashMap.put(puName, 0);
}
}
else {
coreHashMap.put(puName, 0);
}
}
}
}
/*calculate total number of core via summing up all value in hashmap*/
int NoOfCore = 0;
for (final int val : coreHashMap.values()) {
NoOfCore += val;
}
/*Memory contention calculation for CPU
* Lat = baseline + K*C + sGPU*bGPU
* Lat = time necessary to read/write 64B ( nanosecond )
* 64B I repeat. Scale the Lat accordingly to the memory footprint size
* NoOfCore = C in the formula above.
*/
double Lat = 0.0;
if (!c3.isEmpty()) {
if (c3.get(0).contains(A57CPU)) {
Lat = A57_Baseline + (A57_KConstant * NoOfCore) + (A57_sGPU * bGPU);
}
if (c3.get(0).contains(DenverCPU)) {
Lat = Denver_Baseline + (Denver_KConstant * NoOfCore) + (Denver_sGPU * bGPU);
}
/*Memory contention calculation for GPU
* Lat = GPU_Baseline + 0.5*C
* Lat = time necessary to transfer 64B with copy engine ( nanosecond )
* 64B I repeat. Scale the Lat accordingly to the memory footprint size
* NoOfCore = C in the formula above. */
if (c3.get(0).contains(GPU)) {
Lat = GPU_Baseline + 0.5 * NoOfCore;
}
}
/* Latency for 64B calculation end here*/
long sumCycle = 0;
/*get data size of task start here */
final EList<Task> modelTask = this.model.getSwModel().getTasks();
final List<Task> gpuTask = modelTask.stream().filter(a -> a.getStimuli().get(0) instanceof InterProcessStimulus).collect(Collectors.toList());
/*if task is mapped to GPU, use the label within hashmap */
if (gpuTask.contains(task)) {
final List<SetEvent> shottingTaskEvent = SoftwareUtil.collectSetEvents(task, null);
final Process shottingTask = shottingTaskEvent.get(0).getProcess();
/*checking whether shotting task (triggering task) is flagged or not
* if it is flagged, then the current GPU task is on CPU, hence using label in hashmap
* if it is not, then the current GPU task is on GPU, therefore no need to look further more*/
final int shotIndex = this.model.getSwModel().getTasks().indexOf(shottingTask);
if (this.flagArray[shotIndex] == 1) {
final List<Label> readList = this.gpuLabels.get(task)[0];
final List<Label> writeList = this.gpuLabels.get(task)[1];
for (final Label l : readList) {
sumCycle += (long) Math.ceil(l.getSize().getNumberBytes() / 64.0);
}
for (final Label l : writeList) {
sumCycle += (long) Math.ceil(l.getSize().getNumberBytes() / 64.0);
}
}
}
/*normal calculation of total label within task for contention calculation
* this is what we use most of the time*/
else {
for (final Runnable r : SoftwareUtil.getRunnableList(t, null)) {
for (final Label rl : SoftwareUtil.getReadLabelSet(r, null)) {
sumCycle += (long) Math.ceil(rl.getSize().getNumberBytes() / 64.0);
}
for (final Label wl : SoftwareUtil.getWriteLabelSet(r, null)) {
sumCycle += (long) Math.ceil(wl.getSize().getNumberBytes() / 64.0);
}
}
}
/* No core = no interference => No contention*/
if (NoOfCore == 0) {
Lat = 0;
}
/* task's contention in picosecond */
//
double taskLat = 0.0;
taskLat = Lat * sumCycle;
time.setValue(BigInteger.valueOf((long) taskLat * 1000l));
/*Clear out the Array for next task, print console before this if want to check */
b1.clear();
b2.clear();
c1.clear();
c2.clear();
d2.clear();
c3.clear();
}
}
return time;
}
// contentionForTask function end here
/**
* Contention for GPU CopyEngine only, when copyengine perform copyIn
* and copyOut. Mostly same with contention for CPU task,
* except we only considering written label when copyOut
* (since no need to copy back read label)
* @param task
* @return time
*/
public Time contentionForGPUCE(final Task task) {
setUpFlagArrayAndHashMap(this.ia);
final Time contentionTime = FactoryUtil.createTime(BigInteger.ZERO, TimeUnit.PS);
/**
* Constant and variable init, change those constant for different model number
*
* The arrayList below are used to find number of core interfering and also for easier debugging
* b1 = current task's name
* b2 = intefering task's name
* c1 = current task's core
* c2 = intefering task's core
* c3 = core definition's name (to check is task PU to apply the appropiate calculation)
* d2 = c2 but reduced duplication
*/
final ArrayList<String> b1 = new ArrayList<>();
final ArrayList<String> b2 = new ArrayList<>();
final ArrayList<String> c1 = new ArrayList<>();
final ArrayList<String> c2 = new ArrayList<>();
final ArrayList<String> c3 = new ArrayList<>();
ArrayList<String> d2 = new ArrayList<>();
/* name of GPU core, use for identification task's core while calculating */
final String GPU = "GPU";
/* name of CPU core, use for identification task's core while calculating
* can't use the identifyPU type above to distinguished because we have 2 different CPU in this model*/
final String A57CPU = "A57";
final String DenverCPU = "Denver";
/* time taken to read or write a cacheLine (64B)*/
final double A57_Baseline = 20.0;
final double Denver_Baseline = 8.0;
final double GPU_Baseline = 3.0;
/*increase in latency operated by a single interfering core.
* Do note: it does not matter if the interfering core is Denver or A57
* : this number only depends on the observed CPU core (CPUtype)*/
final double A57_KConstant = 20.0;
final double Denver_KConstant = 2.0;
/* sensibility to GPU CE activity.*/
final double A57_sGPU = 100.0;
final double Denver_sGPU = 20.0;
/*bGPU = boolean, 1: if GPU is operating the copy engine, 0 other wise
* We are assuming WCET so this will always be 1 in this implementation*/
final double bGPU = 1.0;
/*Computation to get NUMBER OF CORE intefering start*/
final List<ProcessingUnit> pul = CommonUtils.getPUs(this.model);
final EList<Task> modelTask = this.model.getSwModel().getTasks();
// get gpu task start
final List<Task> gpuTask = modelTask.stream().filter(a -> a.getStimuli().get(0) instanceof InterProcessStimulus).collect(Collectors.toList());
// get gpu task finished
for (final Task t : gpuTask) {
if (t.equals(task)) {
for (final Label l : CommonUtils.getAccessedLabelSet(t)) {
final Set<Runnable> wL = SoftwareUtil.getWriterSetOfLabel(l, null);
for (final Runnable r : wL) {
final List<Process> lT = SoftwareUtil.getProcesses(r, null);
for (final Process lTT : lT) {
for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) {
final ProcessingUnit pup = pul.get(this.ia[taskIndex]);
final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName();
final String coreName = pup.getName();
/* get related task through checking which runnable are accessing the same label
* put the task that contain those runnable into b2 and the core of that task in c2
*/
if (t.getName().equals(taskName)) {
b1.add(taskName);
c1.add(coreName);
c3.add(pup.getDefinition().getName());
}
/*only put that task in our array if task isn't flagged (check for
* unnecessary PRE_POST task, we assume those task are gone when
* corresponded GPU task is mapped to CPU)*/
final int b2Index = this.model.getSwModel().getTasks().indexOf(lTT);
if (lTT.getName().equals(taskName) && (this.flagArray[b2Index] == 0)) {
b2.add(taskName);
c2.add(coreName);
}
}
}
}
final Set<Runnable> rL = SoftwareUtil.getReadersSetOfLabel(l, null);
for (final Runnable r : rL) {
final List<Process> lT1 = SoftwareUtil.getProcesses(r, null);
for (final Process lTT1 : lT1) {
for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) {
final ProcessingUnit pup = pul.get(this.ia[taskIndex]);
final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName();
final String coreName = pup.getName();
/* get related task through checking which runnable are accessing the same label
* put the task that contain those runnable into b2 and the core of that task in c2
*/
if (t.getName().equals(taskName)) {
b1.add(taskName);
c1.add(coreName);
c3.add(pup.getDefinition().getName());
}
/*only put that task in our array if task isn't flagged (check for
* unnecessary PRE_POST task, we assume those task are gone when
* corresponded GPU task is mapped to CPU)*/
final int b2Index1 = this.model.getSwModel().getTasks().indexOf(lTT1);
if (lTT1.getName().equals(taskName) && (this.flagArray[b2Index1] == 0)) {
b2.add(taskName);
c2.add(coreName);
}
}
}
}
}
// Remove all the duplicates inside c2.
d2 = (ArrayList<String>) c2.stream().distinct().collect(Collectors.toList());
/* Hashmap of <Core, Integer>
* Assign value for other task as 1, GPU task and observed task as 0
* total of value in integer will be equal to number of core intefering
* Output of hashMap look like this {Core3=1, Core4=1, Core0=0, GP10B=0}*/
final HashMap<Object, Integer> coreHashMap = new HashMap<Object, Integer>();
for (final ProcessingUnit pu : pul) {
final String puName = pu.getName();
for (final String cName : d2) {
if (cName.equals(puName)) {
if (identifyPUType(pu)) {
coreHashMap.put(puName, 1);
if (puName.equals(c1.get(0))) {
coreHashMap.put(puName, 0);
}
}
else {
coreHashMap.put(puName, 0);
}
}
}
}
int NoOfCore = 0;
for (final int val : coreHashMap.values()) {
NoOfCore += val;
}
/*Memory contention calculation for CPU
* Lat = baseline + K*C + sGPU*bGPU
* Lat = time necessary to read/write 64B ( nanosecond )
* 64B I repeat. Scale the Lat accordingly to the memory footprint size
* NoOfCore = C in the formula above.
*/
double Lat = 0.0;
if (!c3.isEmpty()) {
if (c3.get(0).contains(A57CPU)) {
Lat = A57_Baseline + (A57_KConstant * NoOfCore) + (A57_sGPU * bGPU);
}
if (c3.get(0).contains(DenverCPU)) {
Lat = Denver_Baseline + (Denver_KConstant * NoOfCore) + (Denver_sGPU * bGPU);
}
/*Memory contention calculation for GPU
* Lat = GPU_Baseline + 0.5*C
* Lat = time necessary to transfer 64B with copy engine ( nanosecond )
* 64B I repeat. Scale the Lat accordingly to the memory footprint size
* NoOfCore = C in the formula above. */
if (c3.get(0).contains(GPU)) {
Lat = GPU_Baseline + 0.5 * NoOfCore;
}
}
/* Latency for 64B calculation end here*/
long sumCycle = 0;
/* Full read and write labels for CE when copyIN */
final Set<Label> fullList = SoftwareUtil.getAccessedLabelSet(t, null);
for (final Label l : fullList) {
final long fullSize = l.getSize().getNumberBytes();
final long cycle = (long) Math.ceil(fullSize / 64.0);
sumCycle += cycle;
}
/* Written labels only for CE when CopyOut*/
final Set<Label> writeList = SoftwareUtil.getWriteLabelSet(t, null);
for (final Label l : writeList) {
final long fullSize = l.getSize().getNumberBytes();
final long cycle = (long) Math.ceil(fullSize / 64.0);
sumCycle += cycle;
}
// No core = no interference => No contention
if (NoOfCore == 0) {
Lat = 0;
}
double taskLat = 0.0;
taskLat = Lat * sumCycle;
contentionTime.setValue(BigInteger.valueOf((long) taskLat * 1000l));
/*Clear out the Array for next task, print console before this if want to check */
b1.clear();
b2.clear();
c1.clear();
c2.clear();
d2.clear();
c3.clear();
}
}
// continue
return contentionTime;
}
}