package iptgxdb.converter;

import java.awt.Color;
import java.io.BufferedReader;
import java.io.File;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;

import iptgxdb.utils.GenomeFeature;
import iptgxdb.utils.GenomeLocation;
import iptgxdb.utils.Utils;

/**
 * GenBank2GFF parses a plain-text GenBank file
 * (http://www.ncbi.nlm.nih.gov/genbank/)
 * and converts it to a GFF file.
 * 
 * @author Ulrich Omasits
 * @date 10.11.2011
 */
public class GenBank2GFF extends AConverter {
	@Override
	protected void convert_internal(File inputFile) throws Exception {
		BufferedReader in = Utils.reader(inputFile);
		// not extracted: rRNA, tRNA, misc_RNA
		final List<String> consideredTypes = Arrays.asList("CDS","gene"); // Arrays.asList("CDS","gene","rRNA","tRNA","misc_RNA")
		final String idTag = "locus_tag"; //"protein_id"
		
		String line;
		String currSeqId=null;
		while( (line = in.readLine()) != null ) {
			if (line.startsWith("LOCUS")) {
				if (seqId!=null) {
					currSeqId = seqId;
				} else {
					currSeqId = Iterators.get(Splitter.on(CharMatcher.WHITESPACE).omitEmptyStrings().split(line).iterator(), 1);
				}
			} else if (line.startsWith("FEATURES")) {
				String currFeature = null;
				Map<String,String> attributes = null;
				String currAttribute = null;
				String currValue = null;
				line = in.readLine();
				while ( true ) {
					String prefix = line.substring(5,21).trim();
					String body = line.substring(21);
					
					if (prefix.length()>0) {
						// new feature
						if (attributes!=null && attributes.get("location").equals(body)) {
							// keep the attributes from previously annotated feature for same location
							attributes.remove("location"); // will be re-added
							if ( ! currFeature.equalsIgnoreCase("CDS") && ! prefix.equalsIgnoreCase("misc_feature")) {
								// all but misc_features override previous feature name except for CDSs
								currFeature = prefix;
							}
						} else {
							attributes = new LinkedHashMap<String, String>();
							currFeature = prefix;
						}
						currAttribute = "location";
						currValue = body;
					} else if (body.startsWith("/")) {
						// new attribute
						String[] arr = body.substring(1).split("=");
						currAttribute = arr[0];
						if (arr.length>1)
							currValue = arr[1];
						else
							currValue = "true";
					} else {
						// append to previous attribute
						currValue += " " + body;
					}
					
					line=in.readLine();
					prefix = (line.length() > 21) ? line.substring(5,21).trim() : "";
					body = (line.length() > 21) ? line.substring(21) : "";
					
					if (prefix.length()>0 || ! line.startsWith("     ") || body.startsWith("/")) {
						// save att
						if (attributes.containsKey(currAttribute)) {
							if (currAttribute.equalsIgnoreCase(idTag) || currAttribute.equalsIgnoreCase("gene")) {
								if (! attributes.get(currAttribute).equals(currValue))
									System.out.println("WARN: different "+currAttribute+" values for same location ("+attributes.get("location")+"): "+attributes.get(currAttribute)+" vs. "+currValue);
							} else {
								attributes.put(currAttribute, attributes.get(currAttribute) + ";" + currValue);
							}
						} else
							attributes.put(currAttribute, currValue);
						// att saved
						
						if (prefix.length()>0 || ! line.startsWith("     ")) {
							// save feature
							if (attributes.get("location").equals(body)) {
								// do not save gene as there is some more annotation on the same location...
							} else {
								if (! currFeature.equalsIgnoreCase("source")) {
									if (extensive || consideredTypes.contains(currFeature) || consideredTypes.size()==0) {
										if (extensive || ! currFeature.equalsIgnoreCase("gene") || attributes.containsKey("pseudo")) { // for type=gene only pseudo entries are extracted
											String location = attributes.get("location");
											if (location.startsWith("join(") || location.startsWith("complement(join(")) { 
												// joined location -> multiple fCDS entries
												if (!attributes.containsKey("pseudo")) 
													System.err.println("ERROR: found split non-pseudo feature at "+location);
												String[] locationParts = location.substring(location.lastIndexOf('(')+1, location.indexOf(')')).split(",");
												
												for (int i=0; i<locationParts.length; i++) {
													if (location.startsWith("complement("))
														locationParts[i] = "complement("+locationParts[i]+")";
													GenomeFeature gf = new GenomeFeature(currSeqId, source, "fCDS", new GenomeLocation(locationParts[i], currSeqId), null, null); // override to fCDS
													gf.setAtt("idRedundancy", i+1 + "of" + locationParts.length);
													if (extensive)
														gf.atts.putAll(attributes);
													else if (attributes.containsKey("pseudo"))
														gf.atts.put("pseudo", "true");
													if (attributes.containsKey(idTag))
														gf.setID(attributes.get(idTag));
													else if (attributes.containsKey("gene"))
														gf.setID(attributes.get("gene"));
													else if (attributes.containsKey("ID"))
														gf.setID(attributes.get("ID"));
													
													this.addGenomeFeature(gf);
												}
											} else { 
												// standard location
												GenomeFeature gf = new GenomeFeature(currSeqId, source, currFeature, new GenomeLocation(attributes.get("location"), currSeqId), null, null);
												if (extensive)
													gf.atts.putAll(attributes);
												else if (attributes.containsKey("pseudo"))
													gf.atts.put("pseudo", "true");
												if (attributes.containsKey(idTag))
													gf.setID(attributes.get(idTag));
												else if (attributes.containsKey("gene"))
													gf.setID(attributes.get("gene"));
												else if (attributes.containsKey("ID"))
													gf.setID(attributes.get("ID"));
												
												this.addGenomeFeature(gf);
											}
										}
									}
								}
							}
							// feature saved
							
							if (! line.startsWith("     "))
								break;
						}
					} // end of saving block
				} // end of while loop
			} // end of startsWith "FEATURES"
		} // end of read-while
		in.close();
	}
	
	@Override
	protected Color getColor() {
		return Color.BLUE;
	}
}
