package iptgxdb.utils;


import java.awt.Color;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.commons.lang3.StringUtils;

import iptgxdb.utils.GenomeFeature.IdManipulator;

/**
 * GenomeFeatureSet is a LinkedHashSet of GenomeFeatures plus
 * some additional properties. It resembles a complete annotation
 * file stored in the GFF3 file format.
 * 
 * @author Ulrich Omasits
 * @date 18.11.2011
 */
public class GenomeFeatureSet extends LinkedHashSet<GenomeFeature> {
	private static final long serialVersionUID = 5035865359437800455L;
	
	// byLocation implements a location specific view on the feature set
	public Map<GenomeLocation, GenomeFeature> byLocation = new HashMap<GenomeLocation, GenomeFeature>();
	// byId implements a id based view on the feature set
	public Map<String, GenomeFeature> byId = new HashMap<String, GenomeFeature>();
	// byEndPosition implements a end position specific, sorted view on the feature set
	public SortedMap<GenomeLocation, SortedSet<GenomeFeature>> byEndPosition = new TreeMap<GenomeLocation, SortedSet<GenomeFeature>>();
//	public SortedMap<Integer, SortedSet<GenomeFeature>> byEndPosition = new TreeMap<Integer, SortedSet<GenomeFeature>>();
	
	public AnnotationSource annotationSource; // the type of annotation source
	
	public String name; // not used within the class, just for reference
//	public IdManipulator idManipulator; // not used within the class, just for reference
//	public boolean extensionsOnly; // not used within the class, just for reference
//	public String shortTag; // not used within the class, just for reference
//	public Color color; // not used within the class, just for reference
//	public List<String> unmentionedTypes; // define the GenomeFeature types where the generateId function does not explicitly merges the type into the identifier

	// constructor: full constructor with file and annotation source
	public GenomeFeatureSet(File gffFile, AnnotationSource annotationSource, boolean readPseudo) throws IOException, Exception {
		super();
		this.annotationSource = annotationSource;
		if (annotationSource != null)
			this.name = annotationSource.defaultName;
//		this.name = name;
//		this.shortTag = shortTag;
//		this.color = color;
//		this.extensionsOnly = extensionsOnly;
//		this.unmentionedTypes = unmentionedTypes;
//		this.idManipulator = idManipulator;
		
		if (gffFile != null) {
			final BufferedReader in = new BufferedReader(new FileReader(gffFile));
			String line;
			while( (line=in.readLine()) != null ) {
				if (line.startsWith("#")) continue;
				GenomeFeature gff = new GenomeFeature(line);
				if ( ! readPseudo && gff.getAtt("pseudo","").equals("true"))
					continue;
				else
					this.add(gff);
			}
			in.close();
		}
	}
	
//	// create a copy of this feature set but filtered for a specific value in a specific attribute
//	public GenomeFeatureSet filterForAttribute(String attribute, String value) throws IOException, Exception {
//		GenomeFeatureSet gfsFiltered = new GenomeFeatureSet(null, this.annotationSource);
//		for (GenomeFeature gff : this) {
//			if ((value==null && gff.hasAtt(attribute)==false) || (value!=null && value.equals(gff.getAtt(attribute))))
//				gfsFiltered.add(gff);
//		}
//		gfsFiltered.name = this.name + " (" + attribute + "=" + value + ")";
////		gfsFiltered.shortTag = this.shortTag + "*";
////		gfsFiltered.extensionsOnly = this.extensionsOnly;
////		gfsFiltered.idManipulator = this.idManipulator;
////		gfsFiltered.unmentionedTypes = this.unmentionedTypes;
////		gfsFiltered.color = this.color;
//		return gfsFiltered;
//	}
//	
//	// create a copy of this feature set but filtered for presence of a specific attribute
//	public GenomeFeatureSet filterForAttribute(String attribute) throws IOException, Exception {
//		GenomeFeatureSet gfsFiltered = new GenomeFeatureSet(null, this.annotationSource);
//		for (GenomeFeature gff : this) {
//			if (gff.hasAtt(attribute))
//				gfsFiltered.add(gff);
//		}
//		gfsFiltered.name = this.name + " (" + attribute + ")";
////		gfsFiltered.shortTag = this.shortTag + "*";
////		gfsFiltered.extensionsOnly = this.extensionsOnly;
////		gfsFiltered.idManipulator = this.idManipulator;
////		gfsFiltered.unmentionedTypes = this.unmentionedTypes;
////		gfsFiltered.color = this.color;
//		return gfsFiltered;
//	}

	// generate an id for a genome feature within this feature set
	public String generateId(GenomeFeature gff, boolean shortVersion) {
		String newId;
		if (shortVersion) 
			newId = annotationSource.tag;
		else if (annotationSource.idManipulator!=null)
			newId = annotationSource.idManipulator.generateId(gff);
		else
			newId = gff.getID();
		
		//if (gff.hasAtt("idRedundancy"))
		//	newId += "_" + gff.getAtt("idRedundancy");
		if (annotationSource.unmentionedTypes!=null && (! annotationSource.unmentionedTypes.contains(gff.type)))
			newId += "_" + gff.type;
		if (gff.hasAtt("idRedundancy"))
			newId += StringUtils.substringBefore(gff.getAtt("idRedundancy"), "of");
		if (gff.getAtt("pseudo","false").equals("true"))
			newId += "_p";
		return newId;
	}
	
	// overriding the add method to allow for keeping the byLocation and byEndPosition views in sync
	@Override
	public boolean add(GenomeFeature gff) {
		boolean result = super.add(gff);
		if (result==true) { // new entry
			byId.put(gff.getID(), gff);
			if (byLocation.containsKey(gff.location)) {
				System.err.println("duplicated genome feature location: "+gff);
			} else {
				byLocation.put(gff.location, gff);
				try {
					int signedEnd = gff.location.getSignedEnd();
					GenomeLocation endLoc = new GenomeLocation(signedEnd, signedEnd, gff.location.chromosome);
					if (! byEndPosition.containsKey(endLoc))
						byEndPosition.put(endLoc, new TreeSet<GenomeFeature>(GenomeFeature.comparatorLength));
					byEndPosition.get(endLoc).add(gff);
				} catch (Exception e) {
					e.printStackTrace();
					System.err.println("error");
				}
			}
		} else {
			System.err.println("duplicated genome feature not added to set: "+gff);
		}
		return result;
	}
	
	
	public enum AnnotationSource {
		ORF("orf", "in-silico ORFs", Color.GRAY.brighter(), true, null, new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				return prefix+"ORF";
			}
		}),
		
		REFSEQ("refseq", "RefSeq", Color.BLUE, false, Arrays.asList("CDS","gene"), new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				return gff.getID();
			}
		}),
		
		REF("ref", "Reference", Color.BLUE, false, null, new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				return prefix+"REF";
			}
		}),
		
		ENSEMBL("ens", "Ensembl", Color.BLUE.darker().darker(), false, Arrays.asList("CDS"), new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				if (gff.getID()!=null) {
					// return gff.getID().split(" ")[0].replace(prefix, prefix+"ENS"); // BH16570 {UniProtKB/Swiss-Prot:Q6G1M0} || blr0030 {UniProtKB/TrEMBL:Q89YC4}
					return prefix+"ENS"+gff.getID().split(" ")[0]; // BH16570 {UniProtKB/Swiss-Prot:Q6G1M0} || blr0030 {UniProtKB/TrEMBL:Q89YC4}
				}
				else
					return prefix+"ENS"; // ensembl has some unlabeled features (id = null)
			}		
		}),

		GENOSCOPE("geno", "Genoscope", Color.GREEN.darker(), false, Arrays.asList("CDS"), new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				if (Utils.rx(gff.getID(), "[A-Z]{5}(\\d+)"))
					return prefix+"GENO"+Utils.rxMatcher.group(1);
				else
					return prefix+"GENO"+gff.getID();
			}
		}),
		
		CMR("cmr", "CMR", Color.RED, false, null, new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				if (Utils.rx(gff.getID(), "NT\\d+[A-Z]+(\\d+)"))
					return prefix+"CMR"+Utils.rxMatcher.group(1);
				else
					return prefix+"CMR"+gff.getID();
			}
		}),
		
		CHEMGENOME("chemg", "ChemGenome", Color.YELLOW, false, null, new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				return prefix+"CHEMG";
			}
		}),
		
		SHORTORF("short", "ShortORFs", Color.PINK, false, null, new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				return prefix+"SHORT";
			}
		}),
		
		EASYGENE("easy", "EasyGene", Color.MAGENTA, false, null, new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				return prefix+"EASY";
			}
		}),
		
		PRODIGAL("prod", "Prodigal", Color.ORANGE, false, null, new IdManipulator() {
			@Override
			public String generateId(GenomeFeature gff) {
				return prefix+"PROD";
			}
		})
		
		;
		
		public static String prefix;
		
		public final String defaultName;
		public final String tag;
		public final Color color;
		public final boolean extensionsOnly;
		public final IdManipulator idManipulator;
		public final List<String> unmentionedTypes; // define the GenomeFeature types where the generateId function does not explicitly merges the type into the identifier
		private AnnotationSource(String tag, String defaultName, Color color, boolean extensionsOnly, List<String> unmentionedTypes, IdManipulator idManipulator) {
			this.tag = tag;
			this.defaultName = defaultName;
			this.color = color;
			this.extensionsOnly = extensionsOnly;
			this.unmentionedTypes = unmentionedTypes;
			this.idManipulator = idManipulator;
		}
		@Override
		public String toString() {
			return tag;
		}
		public static AnnotationSource byTag(String tag) {
			for (AnnotationSource as : AnnotationSource.values())
				if (as.tag.equals(tag)) return as;
			return null;
		}
	}
}
