package iptgxdb.converter;

import java.awt.Color;
import java.io.File;
import java.util.Map;

import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;

import iptgxdb.utils.FastaReader;
import iptgxdb.utils.GenomeFeature;
import iptgxdb.utils.GenomeLocation;
import iptgxdb.utils.GenomeLocation.Strand;
import iptgxdb.utils.GenomicsUtil;
import iptgxdb.utils.UOBufferedWriter;


public abstract class AConverter {
	File[] fIn;
	File fOut;
	int countFeatures;
	UOBufferedWriter out;
	UOBufferedWriter outTab;
	String seqId;
	String source;
	boolean extensive;
	File fOutTab;
	Map<String, StringBuilder> seqs;
	Multiset<String> idsUsed = HashMultiset.create();
	
	final int streamExtension = 200;
	int featureExtension = 0;
	boolean autoFeatureExtension = false;
	
	protected abstract Color getColor();
	
	public static Color getColor(String featureType) {
		if (featureType.equalsIgnoreCase("CDS"))
			return Color.BLUE;
		else if (featureType.equalsIgnoreCase("rRNA") 
				|| featureType.equalsIgnoreCase("mRNA")
				|| featureType.equalsIgnoreCase("tRNA")
				|| featureType.equalsIgnoreCase("ncRNA")
				|| featureType.equalsIgnoreCase("miscRNA")
				|| featureType.equalsIgnoreCase("tmRNA"))
			return Color.RED;
		else if (featureType.equalsIgnoreCase("gene"))
			return Color.GREEN;
		
		return Color.GRAY.darker();
	}
	
	public void convert(String seqId, File[] fIn, File fOut, File fOutTab, File fSeq, boolean extensive, String featureExtension) throws Exception {
		this.seqId = seqId;
		this.fIn = fIn;
		this.fOut = fOut;
		this.fOutTab = fOutTab;
		if (featureExtension==null)
			this.featureExtension = 0;
		else if (featureExtension.equals("auto"))
			this.autoFeatureExtension = true;
		else
			this.featureExtension = Integer.parseInt(featureExtension);
		if (fSeq != null) {
			// read in sequences
			Map<String,String> fasta = FastaReader.readFile(fSeq, FastaReader.headerComplete);
			this.seqs = Maps.transformValues(fasta, 
				new Function<String, StringBuilder>() {
					public StringBuilder apply(String value) {
						return new StringBuilder(value);
					}
				}
			); 
		}
		this.extensive = extensive;
		
		out = new UOBufferedWriter(fOut);
		String trackName = fIn[0].getName() + (fIn.length>1 ? ".."+fIn[fIn.length-1].getName() : "");
		out.writeLine(GenomicsUtil.createGFFheader(trackName, (extensive ? null : getColor()) ));
		
		if (fOutTab != null) {
			outTab = new UOBufferedWriter(fOutTab);
			outTab.writeTsvLine("id","type","pseudo","chromosome","from","to","strand","attributes","sequence", streamExtension+"nt upstream", streamExtension+"nt downstream", "protein sequence");
		}
		countFeatures = 0;
		
		for (File inputFile : fIn) {
			this.source = inputFile.getName();
			
			System.out.println("INFO: Parsing '"+inputFile.getName()+"'...");
			convert_internal(inputFile);
		}
		
		out.close();
		if (outTab != null)
			outTab.close();
		
		System.out.println("INFO: Parsing done! Extracted "+countFeatures+" features to '"+fOut.getName()+"'.");
	}
	
	protected abstract void convert_internal(File inputFile) throws Exception;
	
	
	protected void addGenomeFeature(GenomeFeature gf) throws Exception {
		if (extensive) {
			gf.setColor(getColor(gf.type));
		}
		if (featureExtension != 0) {
			gf.location = new GenomeLocation(gf.location.getSignedStart(), gf.location.getSignedEnd()+featureExtension, gf.location.chromosome);
		}
		// make sure IDs are unique, except null
		if (gf.getID() != null) {
			idsUsed.add(gf.getID());
			if (idsUsed.count(gf.getID())>1) {
				gf.setID(gf.getID()+"-"+idsUsed.count(gf.getID()));
			}
		}

    	//TODO: if no stop-codon found then extend by 3nt if stop-codon is right afterwards!
		if (seqs != null) {
			// get the proper sequence -- if there is only one, do not care about identifier matching
			StringBuilder seqForward = (seqs.size() == 1) ? (seqs.values().iterator().next()) : (seqs.get(gf.seqId));
			if (gf.location.length()%3 == 0) {
				// stops in frame -> look for stop-codon
				GenomeLocation lastCodon = new GenomeLocation(gf.location.getSignedEnd()-2, gf.location.getSignedEnd(), gf.location.chromosome);
				try {
					GenomeLocation nextCodon = new GenomeLocation(gf.location.getSignedEnd()+1, gf.location.getSignedEnd()+3, gf.location.chromosome);
					if (GenomicsUtil.translate(lastCodon.getSequence(seqForward)).toString().equals("*")) {
						// ends with stop codon, everything is allright
					} else if (GenomicsUtil.translate(nextCodon.getSequence(seqForward)).toString().equals("*")) {
						// next codon is a stop-codon
						if (autoFeatureExtension) {	// extend genome feature
							gf.location = new GenomeLocation(gf.location.getSignedStart(), gf.location.getSignedEnd()+3, gf.location.chromosome);
						} else {
							System.out.println("INFO: 3nt extension of "+gf.getID()+" would result in a stop-codon terminated sequence.");
						}
					}
				} catch (Exception e) {
					// extension is not possible... resume as normal
				}
			} else {
				// stops out-of-frame -- check for stop codon upon full frame extension
				int lastFullFrameCodon = gf.location.getSignedEnd() - gf.location.length()%3 + 1;
				try {
					GenomeLocation extensionCodon = new GenomeLocation(lastFullFrameCodon, lastFullFrameCodon+2, gf.location.chromosome);
					if (GenomicsUtil.translate(extensionCodon.getSequence(seqForward)).toString().equals("*")) {
						// full frame extension gives stop-codon
						if (autoFeatureExtension) {	// extend genome feature
							gf.location = new GenomeLocation(gf.location.getSignedStart(), lastFullFrameCodon+2, gf.location.chromosome);
						} else {
							System.out.println("INFO: full-frame extension of "+gf.getID()+" would result in a stop-codon terminated sequence.");
						}
					}
				} catch (Exception e) {
					// extension is not possible... resume as normal
				}	
			}
			
			// output table if requested
			if (outTab != null) {
				GenomeLocation upstream = null;
				GenomeLocation downstream = null;
				if (gf.location.strand==Strand.PLUS) {
					upstream = new GenomeLocation(gf.location.from-streamExtension, gf.location.from-1, gf.location.strand, gf.location.chromosome);
					downstream = new GenomeLocation(gf.location.to+1, gf.location.to+streamExtension, gf.location.strand, gf.location.chromosome);
				} else if (gf.location.strand==Strand.MINUS) {
					upstream = new GenomeLocation(gf.location.to+1, gf.location.to+streamExtension, gf.location.strand, gf.location.chromosome);
					downstream = new GenomeLocation(gf.location.from-streamExtension, gf.location.from-1, gf.location.strand, gf.location.chromosome);
				}
				outTab.writeTsvLine(Objects.firstNonNull(gf.getID(),""), gf.type, gf.getAtt("pseudo",""), gf.seqId, gf.location.from, gf.location.to, gf.location.strand, gf.atts.toString(true), gf.location.getSequence(seqForward), upstream.getSequence(seqForward), downstream.getSequence(seqForward), gf.toFastaEntry(seqForward, true, false));
			}
		}

		out.writeLine(gf.toGFFentry());
    	countFeatures++;
	}
}
