| /******************************************************************************* |
| * Copyright (c) 2019 Dortmund University of Applied Sciences and Arts. |
| * |
| * This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License 2.0 |
| * which accompanies this distribution, and is available at |
| * https://www.eclipse.org/legal/epl-2.0/ |
| * |
| * SPDX-License-Identifier: EPL-2.0 |
| * |
| * Contributors: |
| * FH Dortmund - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.app4mc.gsoc_rta; |
| |
| import java.math.BigInteger; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.stream.Collectors; |
| |
| import org.eclipse.app4mc.amalthea.model.Amalthea; |
| import org.eclipse.app4mc.amalthea.model.CallSequenceItem; |
| import org.eclipse.app4mc.amalthea.model.InterProcessStimulus; |
| import org.eclipse.app4mc.amalthea.model.InterProcessTrigger; |
| import org.eclipse.app4mc.amalthea.model.Label; |
| import org.eclipse.app4mc.amalthea.model.LabelAccess; |
| import org.eclipse.app4mc.amalthea.model.LabelAccessEnum; |
| import org.eclipse.app4mc.amalthea.model.Process; |
| import org.eclipse.app4mc.amalthea.model.ProcessingUnit; |
| import org.eclipse.app4mc.amalthea.model.PuType; |
| import org.eclipse.app4mc.amalthea.model.Runnable; |
| import org.eclipse.app4mc.amalthea.model.SetEvent; |
| import org.eclipse.app4mc.amalthea.model.Task; |
| import org.eclipse.app4mc.amalthea.model.TaskRunnableCall; |
| import org.eclipse.app4mc.amalthea.model.Time; |
| import org.eclipse.app4mc.amalthea.model.TimeUnit; |
| import org.eclipse.app4mc.amalthea.model.util.FactoryUtil; |
| import org.eclipse.app4mc.amalthea.model.util.SoftwareUtil; |
| import org.eclipse.emf.common.util.EList; |
| |
| public class Contention { |
| /* |
| * this class will give the worst case latency for each task when memory |
| * contention happen. |
| */ |
| |
| private final int[] ia; |
| private final Amalthea model; |
| private final int[] flagArray; |
| private final HashMap<Task, List<Label>[]> gpuLabels = new HashMap<Task, List<Label>[]>(); |
| |
| public Contention(final int[] iap, final Amalthea modelp) { |
| this.ia = iap; |
| this.model = modelp; |
| if (iap == null) { |
| this.flagArray = null; |
| } else { |
| this.flagArray = new int[iap.length]; |
| } |
| } |
| |
| /** |
| * Checking whether ProcessingUnit is GPU or CPU |
| * @param pu |
| * @return true for CPU and false for GPU |
| */ |
| |
| public static boolean identifyPUType(final ProcessingUnit pu) { |
| /* true=CPU; false=GPU; */ |
| boolean result = false; |
| |
| if (pu.getDefinition().getPuType().equals(PuType.CPU)) { |
| result = true; |
| } |
| |
| else { |
| result = false; |
| } |
| return result; |
| } |
| |
| /** |
| * Initiallize hashmap contain GPU tasks' labels hashmap <task, List<label>> (readList = hashmap.get(task)[0];) |
| * and create flagarray that have the same length with our integerArray, then loop through all the PRE_POST tasks ( via looking for InterProcessStimulus) and check whether the triggered task (GPU task) are located on GPU or not, if it is, then I will give the value of that task in flagArray equal 1. It will look like this |
| |
| ia = { 5, 1, 5, 0, 1, 0, 2, 1, 2, 1, 6, 3, 4, 6 } - our mapping array |
| fa = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,} - the flag array |
| * |
| * @param tpumapping (or ia) |
| */ |
| private void setUpFlagArrayAndHashMap(final int[] iap) { |
| final EList<Task> modelTask = this.model.getSwModel().getTasks(); |
| /* get gpu task list via checking task with InterProcessStimulus*/ |
| |
| final List<Task> gpuTask = modelTask.stream().filter(a -> a.getStimuli().get(0) instanceof InterProcessStimulus).collect(Collectors.toList()); |
| /* get shotting task start*/ |
| for (final Task t : gpuTask) { |
| /* shotting task => t.getName */ |
| final List<SetEvent> shottingTaskEvent = SoftwareUtil.collectSetEvents(t, null); |
| final Process shottingTask = shottingTaskEvent.get(0).getProcess(); |
| final int shotIndex = this.model.getSwModel().getTasks().indexOf(shottingTask); |
| final int gpuIndex = this.model.getSwModel().getTasks().indexOf(t); |
| if (iap[gpuIndex] != 6) { |
| /* If GPU is at GPU task then no flag */ |
| this.flagArray[shotIndex] = 1; |
| } |
| /* create HashMap for GPU task that mapped to CPU start here*/ |
| final List<Label> readLabelList = new ArrayList<Label>(); |
| final List<Label> writeLabelList = new ArrayList<Label>(); |
| final List<CallSequenceItem> callList = SoftwareUtil.collectCalls(shottingTask); |
| final CallSequenceItem ipt = callList.stream().filter(a -> a instanceof InterProcessTrigger).iterator().next(); |
| /* |
| * get the position of InterProcessTrigger within task taking |
| * readLabel and write labels from pre and post processing ( |
| * preprocessing happen before trigger and post_processing happen |
| * after the trigger ) |
| */ |
| final int indexforTrigger = callList.indexOf(ipt); |
| for (int i = 0; i < callList.size(); i++) { |
| Runnable thisRunnable = null; |
| /* Pre-processing Runnable */ |
| if ((i < indexforTrigger) && (callList.get(i) instanceof TaskRunnableCall)) { |
| thisRunnable = ((TaskRunnableCall) callList.get(i)).getRunnable(); |
| final List<LabelAccess> thisLAList = SoftwareUtil.getLabelAccessList(thisRunnable, null); |
| for (final LabelAccess la : thisLAList) { |
| if (la.getAccess().equals(LabelAccessEnum.READ)) { |
| readLabelList.add(la.getData()); |
| } |
| } |
| } |
| /* Post-processing Runnable */ |
| else if ((i > indexforTrigger) && (callList.get(i) instanceof TaskRunnableCall)) { |
| thisRunnable = ((TaskRunnableCall) callList.get(i)).getRunnable(); |
| final List<LabelAccess> thisLAList = SoftwareUtil.getLabelAccessList(thisRunnable, null); |
| for (final LabelAccess la : thisLAList) { |
| if (la.getAccess().equals(LabelAccessEnum.WRITE)) { |
| writeLabelList.add(la.getData()); |
| } |
| } |
| } |
| } |
| |
| @SuppressWarnings("unchecked") |
| final List<Label>[] aryofLabelList = new ArrayList[2]; |
| aryofLabelList[0] = readLabelList; |
| aryofLabelList[1] = writeLabelList; |
| this.gpuLabels.put(t, aryofLabelList); |
| // HashMap created with <Task, ArrayofLabelList> |
| } |
| |
| } |
| |
| /** Calculating contention for CPU task only |
| * |
| * @param task |
| * @return time |
| */ |
| |
| public Time contentionForTask(final Task task) { |
| /* Initialize hashmap */ |
| setUpFlagArrayAndHashMap(this.ia); |
| |
| final Time time = FactoryUtil.createTime(BigInteger.ZERO, TimeUnit.PS); |
| /** |
| * Constant and variable init, change those constant for different model number |
| * |
| * The arrayList below are used to find number of core interfering and also for easier debugging |
| * b1 = current task's name |
| * b2 = intefering task's name |
| * c1 = current task's core |
| * c2 = intefering task's core |
| * c3 = core definition's name (to check is task PU to apply the appropiate calculation) |
| * d2 = c2 but reduced duplication |
| */ |
| final ArrayList<String> b1 = new ArrayList<>(); |
| final ArrayList<String> b2 = new ArrayList<>(); |
| final ArrayList<String> c1 = new ArrayList<>(); |
| final ArrayList<String> c2 = new ArrayList<>(); |
| final ArrayList<String> c3 = new ArrayList<>(); |
| ArrayList<String> d2 = new ArrayList<>(); |
| |
| /* name of GPU core, use for identification task's core while calculating */ |
| final String GPU = "GPU"; |
| /* name of CPU core, use for identification task's core while calculating |
| * can't use the identifyPU type above to distinguished because we have 2 different CPU in this model*/ |
| final String A57CPU = "A57"; |
| final String DenverCPU = "Denver"; |
| |
| /* time taken to read or write a cacheLine (64B)*/ |
| final double A57_Baseline = 20.0; |
| final double Denver_Baseline = 8.0; |
| final double GPU_Baseline = 3.0; |
| /*increase in latency operated by a single interfering core. |
| * Do note: it does not matter if the interfering core is Denver or A57 |
| * : this number only depends on the observed CPU core (CPUtype)*/ |
| |
| |
| final double A57_KConstant = 20.0; |
| final double Denver_KConstant = 2.0; |
| |
| /* sensibility to GPU CE activity.*/ |
| final double A57_sGPU = 100.0; |
| final double Denver_sGPU = 20.0; |
| |
| /*bGPU = boolean, 1: if GPU is operating the copy engine, 0 other wise |
| * We are assuming WCET so this will always be 1 in this implementation*/ |
| final double bGPU = 1.0; |
| |
| /*Computation to get NUMBER OF CORE intefering start*/ |
| |
| final List<ProcessingUnit> pul = CommonUtils.getPUs(this.model); |
| for (final Task t : this.model.getSwModel().getTasks()) { |
| if (t.equals(task)) { |
| |
| /* Checking if task is flagged or not, because |
| * if GPU task is on CPU => PREPOST will not be considered */ |
| |
| final int currentTaskIndex = this.model.getSwModel().getTasks().indexOf(task); |
| if (this.flagArray[currentTaskIndex] == 1) { |
| return time; |
| } |
| |
| for (final Label l : CommonUtils.getAccessedLabelSet(t)) { |
| |
| final Set<Runnable> wL = SoftwareUtil.getWriterSetOfLabel(l, null); |
| for (final Runnable r : wL) { |
| final List<Process> lT = SoftwareUtil.getProcesses(r, null); |
| for (final Process lTT : lT) { |
| /* get related task through checking which runnable are accessing the same label |
| * put the task that contain those runnable into b2 and the core of that task in c2 |
| */ |
| for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) { |
| final ProcessingUnit pup = pul.get(this.ia[taskIndex]); |
| final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName(); |
| final String coreName = pup.getName(); |
| if (t.getName().equals(taskName)) { |
| b1.add(taskName); |
| c1.add(coreName); |
| c3.add(pup.getDefinition().getName()); |
| } |
| final int b2Index = this.model.getSwModel().getTasks().indexOf(lTT); |
| /*only put that task in our array if task isn't flagged (check for |
| * unnecessary PRE_POST task, we assume those task are gone when |
| * corresponded GPU task is mapped to CPU)*/ |
| if (lTT.getName().equals(taskName) && (this.flagArray[b2Index] == 0)) { |
| b2.add(taskName); |
| c2.add(coreName); |
| } |
| } |
| } |
| } |
| |
| |
| final Set<Runnable> rL = SoftwareUtil.getReadersSetOfLabel(l, null); |
| for (final Runnable r : rL) { |
| final List<Process> lT1 = SoftwareUtil.getProcesses(r, null); |
| for (final Process lTT1 : lT1) { |
| /* get related task through checking which runnable are accessing the same label |
| * put the task that contain those runnable into b2 and the core of that task in c2 |
| */ |
| for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) { |
| final ProcessingUnit pup = pul.get(this.ia[taskIndex]); |
| |
| final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName(); |
| final String coreName = pup.getName(); |
| if (t.getName().equals(taskName)) { |
| b1.add(taskName); |
| c1.add(coreName); |
| c3.add(pup.getDefinition().getName()); |
| |
| } |
| /*only put that task in our array if task isn't flagged (check for |
| * unnecessary PRE_POST task, we assume those task are gone when |
| * corresponded GPU task is mapped to CPU)*/ |
| |
| final int b2Index1 = this.model.getSwModel().getTasks().indexOf(lTT1); |
| |
| if (lTT1.getName().equals(taskName) && (this.flagArray[b2Index1] == 0)) { |
| b2.add(taskName); |
| c2.add(coreName); |
| } |
| } |
| } |
| } |
| |
| } |
| |
| /* Remove all the duplicates inside c2.*/ |
| d2 = (ArrayList<String>) c2.stream().distinct().collect(Collectors.toList()); |
| |
| /* Hashmap of <Core, Integer> |
| * Assign value for other task as 1, GPU task and observed task as 0 |
| * total of value in integer will be equal to number of core intefering |
| * Output of hashMap look like this {Core3=1, Core4=1, Core0=0, GP10B=0}*/ |
| |
| final HashMap<Object, Integer> coreHashMap = new HashMap<Object, Integer>(); |
| for (final ProcessingUnit pu : pul) { |
| final String puName = pu.getName(); |
| for (final String cName : d2) { |
| if (cName.equals(puName)) { |
| if (identifyPUType(pu)) { |
| coreHashMap.put(puName, 1); |
| if (puName.equals(c1.get(0))) { |
| coreHashMap.put(puName, 0); |
| } |
| } |
| |
| else { |
| coreHashMap.put(puName, 0); |
| } |
| } |
| } |
| } |
| |
| /*calculate total number of core via summing up all value in hashmap*/ |
| int NoOfCore = 0; |
| for (final int val : coreHashMap.values()) { |
| NoOfCore += val; |
| } |
| /*Memory contention calculation for CPU |
| * Lat = baseline + K*C + sGPU*bGPU |
| * Lat = time necessary to read/write 64B ( nanosecond ) |
| * 64B I repeat. Scale the Lat accordingly to the memory footprint size |
| * NoOfCore = C in the formula above. |
| */ |
| double Lat = 0.0; |
| |
| if (!c3.isEmpty()) { |
| if (c3.get(0).contains(A57CPU)) { |
| Lat = A57_Baseline + (A57_KConstant * NoOfCore) + (A57_sGPU * bGPU); |
| } |
| if (c3.get(0).contains(DenverCPU)) { |
| Lat = Denver_Baseline + (Denver_KConstant * NoOfCore) + (Denver_sGPU * bGPU); |
| } |
| |
| /*Memory contention calculation for GPU |
| * Lat = GPU_Baseline + 0.5*C |
| * Lat = time necessary to transfer 64B with copy engine ( nanosecond ) |
| * 64B I repeat. Scale the Lat accordingly to the memory footprint size |
| * NoOfCore = C in the formula above. */ |
| |
| if (c3.get(0).contains(GPU)) { |
| Lat = GPU_Baseline + 0.5 * NoOfCore; |
| } |
| } |
| |
| /* Latency for 64B calculation end here*/ |
| |
| long sumCycle = 0; |
| |
| |
| /*get data size of task start here */ |
| final EList<Task> modelTask = this.model.getSwModel().getTasks(); |
| final List<Task> gpuTask = modelTask.stream().filter(a -> a.getStimuli().get(0) instanceof InterProcessStimulus).collect(Collectors.toList()); |
| |
| /*if task is mapped to GPU, use the label within hashmap */ |
| if (gpuTask.contains(task)) { |
| final List<SetEvent> shottingTaskEvent = SoftwareUtil.collectSetEvents(task, null); |
| final Process shottingTask = shottingTaskEvent.get(0).getProcess(); |
| /*checking whether shotting task (triggering task) is flagged or not |
| * if it is flagged, then the current GPU task is on CPU, hence using label in hashmap |
| * if it is not, then the current GPU task is on GPU, therefore no need to look further more*/ |
| final int shotIndex = this.model.getSwModel().getTasks().indexOf(shottingTask); |
| |
| if (this.flagArray[shotIndex] == 1) { |
| final List<Label> readList = this.gpuLabels.get(task)[0]; |
| final List<Label> writeList = this.gpuLabels.get(task)[1]; |
| for (final Label l : readList) { |
| |
| sumCycle += (long) Math.ceil(l.getSize().getNumberBytes() / 64.0); |
| |
| } |
| |
| for (final Label l : writeList) { |
| sumCycle += (long) Math.ceil(l.getSize().getNumberBytes() / 64.0); |
| } |
| } |
| |
| } |
| |
| /*normal calculation of total label within task for contention calculation |
| * this is what we use most of the time*/ |
| else { |
| for (final Runnable r : SoftwareUtil.getRunnableList(t, null)) { |
| for (final Label rl : SoftwareUtil.getReadLabelSet(r, null)) { |
| sumCycle += (long) Math.ceil(rl.getSize().getNumberBytes() / 64.0); |
| } |
| |
| for (final Label wl : SoftwareUtil.getWriteLabelSet(r, null)) { |
| sumCycle += (long) Math.ceil(wl.getSize().getNumberBytes() / 64.0); |
| } |
| } |
| |
| } |
| |
| |
| /* No core = no interference => No contention*/ |
| if (NoOfCore == 0) { |
| Lat = 0; |
| } |
| |
| /* task's contention in picosecond */ |
| // |
| double taskLat = 0.0; |
| taskLat = Lat * sumCycle; |
| |
| time.setValue(BigInteger.valueOf((long) taskLat * 1000l)); |
| /*Clear out the Array for next task, print console before this if want to check */ |
| b1.clear(); |
| b2.clear(); |
| c1.clear(); |
| c2.clear(); |
| d2.clear(); |
| c3.clear(); |
| } |
| |
| } |
| return time; |
| } |
| // contentionForTask function end here |
| |
| |
| /** |
| * Contention for GPU CopyEngine only, when copyengine perform copyIn |
| * and copyOut. Mostly same with contention for CPU task, |
| * except we only considering written label when copyOut |
| * (since no need to copy back read label) |
| * @param task |
| * @return time |
| */ |
| |
| public Time contentionForGPUCE(final Task task) { |
| setUpFlagArrayAndHashMap(this.ia); |
| final Time contentionTime = FactoryUtil.createTime(BigInteger.ZERO, TimeUnit.PS); |
| /** |
| * Constant and variable init, change those constant for different model number |
| * |
| * The arrayList below are used to find number of core interfering and also for easier debugging |
| * b1 = current task's name |
| * b2 = intefering task's name |
| * c1 = current task's core |
| * c2 = intefering task's core |
| * c3 = core definition's name (to check is task PU to apply the appropiate calculation) |
| * d2 = c2 but reduced duplication |
| */ |
| |
| final ArrayList<String> b1 = new ArrayList<>(); |
| final ArrayList<String> b2 = new ArrayList<>(); |
| final ArrayList<String> c1 = new ArrayList<>(); |
| final ArrayList<String> c2 = new ArrayList<>(); |
| final ArrayList<String> c3 = new ArrayList<>(); |
| ArrayList<String> d2 = new ArrayList<>(); |
| |
| /* name of GPU core, use for identification task's core while calculating */ |
| final String GPU = "GPU"; |
| /* name of CPU core, use for identification task's core while calculating |
| * can't use the identifyPU type above to distinguished because we have 2 different CPU in this model*/ |
| final String A57CPU = "A57"; |
| final String DenverCPU = "Denver"; |
| |
| /* time taken to read or write a cacheLine (64B)*/ |
| final double A57_Baseline = 20.0; |
| final double Denver_Baseline = 8.0; |
| final double GPU_Baseline = 3.0; |
| /*increase in latency operated by a single interfering core. |
| * Do note: it does not matter if the interfering core is Denver or A57 |
| * : this number only depends on the observed CPU core (CPUtype)*/ |
| |
| final double A57_KConstant = 20.0; |
| final double Denver_KConstant = 2.0; |
| |
| /* sensibility to GPU CE activity.*/ |
| final double A57_sGPU = 100.0; |
| final double Denver_sGPU = 20.0; |
| |
| /*bGPU = boolean, 1: if GPU is operating the copy engine, 0 other wise |
| * We are assuming WCET so this will always be 1 in this implementation*/ |
| final double bGPU = 1.0; |
| |
| /*Computation to get NUMBER OF CORE intefering start*/ |
| final List<ProcessingUnit> pul = CommonUtils.getPUs(this.model); |
| final EList<Task> modelTask = this.model.getSwModel().getTasks(); |
| // get gpu task start |
| final List<Task> gpuTask = modelTask.stream().filter(a -> a.getStimuli().get(0) instanceof InterProcessStimulus).collect(Collectors.toList()); |
| // get gpu task finished |
| for (final Task t : gpuTask) { |
| if (t.equals(task)) { |
| for (final Label l : CommonUtils.getAccessedLabelSet(t)) { |
| |
| final Set<Runnable> wL = SoftwareUtil.getWriterSetOfLabel(l, null); |
| for (final Runnable r : wL) { |
| final List<Process> lT = SoftwareUtil.getProcesses(r, null); |
| for (final Process lTT : lT) { |
| |
| for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) { |
| final ProcessingUnit pup = pul.get(this.ia[taskIndex]); |
| final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName(); |
| final String coreName = pup.getName(); |
| /* get related task through checking which runnable are accessing the same label |
| * put the task that contain those runnable into b2 and the core of that task in c2 |
| */ |
| |
| if (t.getName().equals(taskName)) { |
| b1.add(taskName); |
| c1.add(coreName); |
| c3.add(pup.getDefinition().getName()); |
| } |
| /*only put that task in our array if task isn't flagged (check for |
| * unnecessary PRE_POST task, we assume those task are gone when |
| * corresponded GPU task is mapped to CPU)*/ |
| |
| final int b2Index = this.model.getSwModel().getTasks().indexOf(lTT); |
| if (lTT.getName().equals(taskName) && (this.flagArray[b2Index] == 0)) { |
| b2.add(taskName); |
| c2.add(coreName); |
| } |
| } |
| } |
| } |
| final Set<Runnable> rL = SoftwareUtil.getReadersSetOfLabel(l, null); |
| for (final Runnable r : rL) { |
| final List<Process> lT1 = SoftwareUtil.getProcesses(r, null); |
| for (final Process lTT1 : lT1) { |
| for (int taskIndex = 0; taskIndex < this.ia.length; taskIndex++) { |
| final ProcessingUnit pup = pul.get(this.ia[taskIndex]); |
| final String taskName = this.model.getSwModel().getTasks().get(taskIndex).getName(); |
| final String coreName = pup.getName(); |
| /* get related task through checking which runnable are accessing the same label |
| * put the task that contain those runnable into b2 and the core of that task in c2 |
| */ |
| if (t.getName().equals(taskName)) { |
| b1.add(taskName); |
| c1.add(coreName); |
| c3.add(pup.getDefinition().getName()); |
| |
| } |
| |
| /*only put that task in our array if task isn't flagged (check for |
| * unnecessary PRE_POST task, we assume those task are gone when |
| * corresponded GPU task is mapped to CPU)*/ |
| final int b2Index1 = this.model.getSwModel().getTasks().indexOf(lTT1); |
| if (lTT1.getName().equals(taskName) && (this.flagArray[b2Index1] == 0)) { |
| b2.add(taskName); |
| c2.add(coreName); |
| } |
| } |
| } |
| } |
| |
| } |
| // Remove all the duplicates inside c2. |
| d2 = (ArrayList<String>) c2.stream().distinct().collect(Collectors.toList()); |
| |
| /* Hashmap of <Core, Integer> |
| * Assign value for other task as 1, GPU task and observed task as 0 |
| * total of value in integer will be equal to number of core intefering |
| * Output of hashMap look like this {Core3=1, Core4=1, Core0=0, GP10B=0}*/ |
| final HashMap<Object, Integer> coreHashMap = new HashMap<Object, Integer>(); |
| for (final ProcessingUnit pu : pul) { |
| final String puName = pu.getName(); |
| for (final String cName : d2) { |
| if (cName.equals(puName)) { |
| if (identifyPUType(pu)) { |
| coreHashMap.put(puName, 1); |
| if (puName.equals(c1.get(0))) { |
| coreHashMap.put(puName, 0); |
| } |
| } |
| else { |
| coreHashMap.put(puName, 0); |
| } |
| } |
| } |
| } |
| int NoOfCore = 0; |
| for (final int val : coreHashMap.values()) { |
| NoOfCore += val; |
| } |
| /*Memory contention calculation for CPU |
| * Lat = baseline + K*C + sGPU*bGPU |
| * Lat = time necessary to read/write 64B ( nanosecond ) |
| * 64B I repeat. Scale the Lat accordingly to the memory footprint size |
| * NoOfCore = C in the formula above. |
| */ |
| double Lat = 0.0; |
| if (!c3.isEmpty()) { |
| if (c3.get(0).contains(A57CPU)) { |
| Lat = A57_Baseline + (A57_KConstant * NoOfCore) + (A57_sGPU * bGPU); |
| } |
| if (c3.get(0).contains(DenverCPU)) { |
| Lat = Denver_Baseline + (Denver_KConstant * NoOfCore) + (Denver_sGPU * bGPU); |
| } |
| /*Memory contention calculation for GPU |
| * Lat = GPU_Baseline + 0.5*C |
| * Lat = time necessary to transfer 64B with copy engine ( nanosecond ) |
| * 64B I repeat. Scale the Lat accordingly to the memory footprint size |
| * NoOfCore = C in the formula above. */ |
| |
| if (c3.get(0).contains(GPU)) { |
| Lat = GPU_Baseline + 0.5 * NoOfCore; |
| } |
| } |
| /* Latency for 64B calculation end here*/ |
| long sumCycle = 0; |
| /* Full read and write labels for CE when copyIN */ |
| final Set<Label> fullList = SoftwareUtil.getAccessedLabelSet(t, null); |
| for (final Label l : fullList) { |
| final long fullSize = l.getSize().getNumberBytes(); |
| final long cycle = (long) Math.ceil(fullSize / 64.0); |
| sumCycle += cycle; |
| } |
| |
| |
| /* Written labels only for CE when CopyOut*/ |
| final Set<Label> writeList = SoftwareUtil.getWriteLabelSet(t, null); |
| for (final Label l : writeList) { |
| final long fullSize = l.getSize().getNumberBytes(); |
| final long cycle = (long) Math.ceil(fullSize / 64.0); |
| sumCycle += cycle; |
| } |
| |
| |
| // No core = no interference => No contention |
| if (NoOfCore == 0) { |
| Lat = 0; |
| } |
| double taskLat = 0.0; |
| taskLat = Lat * sumCycle; |
| |
| contentionTime.setValue(BigInteger.valueOf((long) taskLat * 1000l)); |
| |
| /*Clear out the Array for next task, print console before this if want to check */ |
| b1.clear(); |
| b2.clear(); |
| c1.clear(); |
| c2.clear(); |
| d2.clear(); |
| c3.clear(); |
| } |
| |
| } |
| // continue |
| |
| return contentionTime; |
| } |
| } |