/**
 ********************************************************************************
 * Copyright (c) 2021 Robert Bosch GmbH.
 * 
 * This program and the accompanying materials are made
 * available under the terms of the Eclipse Public License 2.0
 * which is available at https://www.eclipse.org/legal/epl-2.0/
 * 
 * SPDX-License-Identifier: EPL-2.0
 * 
 * Contributors:
 *     Robert Bosch GmbH - initial API and implementation
 ********************************************************************************
 */

package org.eclipse.app4mc.slg.commons.m2t.transformers.sw;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.eclipse.app4mc.amalthea.model.Label;
import org.eclipse.app4mc.slg.commons.m2t.generators.LabelGenerator;
import org.eclipse.app4mc.slg.commons.m2t.transformers.SLGBaseTransformer;
import org.eclipse.app4mc.slg.commons.m2t.transformers.SLGTranslationUnit;
import org.eclipse.app4mc.transformation.util.OutputBuffer;

import com.google.inject.Inject;
import com.google.inject.Singleton;

@Singleton
public class LabelTransformer extends SLGBaseTransformer {

	public static final String LIB_NAME = "LABELS_LIB";
	public static final String BASE_PATH = "synthetic_gen";
	public static final String MODULE_NAME = "labels";
	public static final String MODULE_PATH = BASE_PATH + "/" + MODULE_NAME;
	public static final String MAKEFILE_PATH = MODULE_PATH + "/CMakeLists.txt";

	@Inject private OutputBuffer outputBuffer;

	// ---------- generic part "def create new transform(...)" ----------

	private final Map<List<Object>, SLGTranslationUnit> transformCache = new HashMap<>();

	@Override
	public Map<List<Object>, SLGTranslationUnit> getCache() {
		return this.transformCache;
	}

	public SLGTranslationUnit transform(final Label label) {
		final List<Object> key = new ArrayList<>(Arrays.asList(label));
		final SLGTranslationUnit tu;

		synchronized (transformCache) {
			if (transformCache.containsKey(key)) {
				return transformCache.get(key);
			}
			tu = createTranslationUnit(label);
			transformCache.put(key, tu);
		}

		// if translation unit is newly created and valid -> create files
		if (tu.isValid()) {
			doTransform(tu, label);
		}

		return tu;
	}

	// ---------------------------------------------------

	protected SLGTranslationUnit createTranslationUnit(final Label label) {
		if ((label == null)) {
			return new SLGTranslationUnit("UNSPECIFIED LABEL");
		} else {
			String basePath = BASE_PATH;
			String moduleName = MODULE_NAME;
			String call = label.getName();
			return new SLGTranslationUnit(basePath, moduleName, call);
		}
	}

	protected void doTransform(final SLGTranslationUnit tu, final Label label) {
		genFiles(tu, label);
	}

	protected void genFiles(SLGTranslationUnit tu, Label label) {
		if (isSrcFileEmpty(tu)) {
			srcAppend(tu, "#include \"" + getIncFile(tu) + "\"\n\n");
			toCpp(tu);
		}
		if (isIncFileEmpty(tu)) {	// Mc: ???
			incAppend(tu, "#include <stdint.h>\n\n");
			incAppend(tu, "uint64_t read_label(uint8_t *p, int size);\n");
			incAppend(tu, "uint64_t write_label(uint8_t *p, int size);\n\n");
		}

		incAppend(tu, LabelGenerator.toH(label));
		//srcAppend(tu, LabelGenerator.toCpp(label));
	}

	// When compiled with -02, 'read_label' main loop is like this, mostly 8 * 64bits memory reads per iteration 
	// 
	//.L2:

	//	add    rcx,rdi
	//	cmp    rdi,r8
	//	jae    4c <read_label(unsigned char*, int)+0x4c>
	//	mov    rdx,QWORD PTR [rdi+0x8]
	//	add    rdi,0x40
	//	add    rdx,QWORD PTR [rdi-0x40]
	//	add    rdx,QWORD PTR [rdi-0x30]
	//	add    rdx,QWORD PTR [rdi-0x28]
	//	add    rdx,QWORD PTR [rdi-0x20]
	//	add    rdx,QWORD PTR [rdi-0x18]
	//	add    rdx,QWORD PTR [rdi-0x10]
	//	add    rdx,QWORD PTR [rdi-0x8]
	//	add    rax,rdx
	//	cmp    r8,rdi
	//	ja     20 <read_label(unsigned char*, int)+0x20>	/
	//....
	//
	//
	// 'write_label' main loop is like this, mostly 8 * 64bits memory writes per iteration 
	//
	//  	cmp    rdi,rcx
	//  	jae    12e <write_label(unsigned char*, int)+0x4e>
	//  	xchg   ax,ax
	//  	mov    esi,0xaffffffe
	//  	add    rdi,0x40
	//  	mov    QWORD PTR [rdi-0x40],rsi
	//  	mov    QWORD PTR [rdi-0x38],rsi
	//  	mov    QWORD PTR [rdi-0x30],rsi
	//  	mov    QWORD PTR [rdi-0x28],rsi
	//  	mov    QWORD PTR [rdi-0x20],rsi
	//  	mov    QWORD PTR [rdi-0x18],rsi
	//  	mov    QWORD PTR [rdi-0x10],rsi
	//  	mov    QWORD PTR [rdi-0x8],rsi
	//  	cmp    rcx,rdi
	//  	ja     100 <write_label(unsigned char*, int)+0x20>
	//  	cmp    rax,rdx	
	// 		...
	
	protected void toCpp(SLGTranslationUnit tu) {
		srcAppend(tu, "uint64_t read_label(uint8_t *p, int size){\n");
		srcAppend(tu, "  register uint64_t *p_start= (uint64_t*)p;\n");
		srcAppend(tu, "  // 64 is the number of bytes moved inside the 1st while loop\n");
		srcAppend(tu, "  uint64_t left_over = size & (64-1);\n");
		srcAppend(tu, "  uint64_t multiple_of_64 = size - left_over;\n");
		srcAppend(tu, "  register uint64_t *p_end_multiple_of_64= (uint64_t*)&(p[multiple_of_64-1]);\n");
		srcAppend(tu, "  // pointers to the 2nd loop\n");
		srcAppend(tu, "  register uint8_t *p_start2= &(p[multiple_of_64]);\n");
		srcAppend(tu, "  register uint8_t *p_end= &(p[size-1]);\n");
		srcAppend(tu, "  register uint64_t val=0;\n");
		srcAppend(tu, "  // executes 8 moves of 8 bytes each\n");
		srcAppend(tu, "  while(p_start<p_end_multiple_of_64){\n");
		srcAppend(tu, "	val += p_start[0] + p_start[1] + p_start[2] + p_start[3] + \n");
		srcAppend(tu, "		   p_start[4] + p_start[5] + p_start[6] + p_start[7];\n");
		srcAppend(tu, "	p_start += 8;\n");
		srcAppend(tu, "  }\n");
		srcAppend(tu, "  // executes the remaining moves, byte by byte\n");
		srcAppend(tu, "  while(p_start2<=p_end){\n");
		srcAppend(tu, "	val += *p_start2;\n");
		srcAppend(tu, "	p_start2 ++;\n");
		srcAppend(tu, "  }\n");
		srcAppend(tu, "  return val;\n");
		srcAppend(tu, "}\n\n");
		
		srcAppend(tu, "uint64_t write_label(uint8_t *p, int size){\n");
		srcAppend(tu, "    register uint64_t *p_start= (uint64_t*)p;\n");
		srcAppend(tu, "    // 64 is the number of bytes moved inside the 1st while loop\n");
		srcAppend(tu, "    uint64_t left_over = size & (64-1);\n");
		srcAppend(tu, "    uint64_t multiple_of_64 = size - left_over;\n");
		srcAppend(tu, "    register uint64_t *p_end_multiple_of_64= (uint64_t*)&(p[multiple_of_64-1]);\n");
		srcAppend(tu, "    // pointers to the 2nd loop\n");
		srcAppend(tu, "    register uint8_t *p_start2= &(p[multiple_of_64]);\n");
		srcAppend(tu, "    register uint8_t *p_end= &(p[size-1]);\n");
		srcAppend(tu, "    register uint64_t val=0;\n");
		srcAppend(tu, "    // executes 8 moves of 8 bytes each\n");
		srcAppend(tu, "    while(p_start<p_end_multiple_of_64){\n");
		srcAppend(tu, "      p_start[0] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start[1] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start[2] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start[3] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start[4] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start[5] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start[6] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start[7] = 0xAFFFFFFE;\n");
		srcAppend(tu, "      p_start +=8;\n");
		srcAppend(tu, "    }\n");
		srcAppend(tu, "    // executes the remaining moves, byte by byte\n");
		srcAppend(tu, "    while(p_start2<=p_end){\n");
		srcAppend(tu, "      *(p_start2)   = 0xAF;\n");
		srcAppend(tu, "      p_start2 ++;\n");
		srcAppend(tu, "    }\n");
		srcAppend(tu, "    return val;\n");
		srcAppend(tu, "}\n\n");
				
	}
	
	public boolean createCMake() {
		return outputBuffer.appendTo(
				"OTHER", MAKEFILE_PATH, LabelGenerator.toCMake(LIB_NAME, getSrcFiles()));
	}

}
