package edu.mit.compbio.flynet.genome;

import edu.mit.compbio.flynet.*;



/**
 * Parser for gff files
 */
public class ParserGff extends FileParser {

	/** The genome where the parsed regions should be added */
	//Genome genome_ = null;
	/** The current line */
	String[] currentLine_ = null;
	/** Set true if the value of features (9th column) are in quotes (true for RedFly, false for FlyBase) */
	//private boolean featuresHaveQuotes_ = false;
	
	/** Column where the chromosome is specified in gff format */
	static private final int CHROMOSOME = 0;
	/** Column with the start */
	static private final int START = 3;
	/** Column with the end */
	static private final int END = 4;
	/** Column with the strand */
	//static private final int STRAND = 6;
	/** Column with additional features */
	static private final int FEATURES = 8;
	
	
	// ============================================================================
	// PUBLIC METHODS
	
	/** Constructor */
	public ParserGff(String filename) {
	
		super(filename);
	}
	
	
	// ----------------------------------------------------------------------------

	/** Read the next line, initialize the given region and add it to genome_. Returns the complete line. */
	public boolean readLineGff(Region region) {
		
		currentLine_ = readLine();
		if (currentLine_ == null)
			return false;
		
		assert currentLine_.length == 9;

		int start = Integer.parseInt(currentLine_[START]);
		int end = Integer.parseInt(currentLine_[END]);
		
		region.setRegion(start, end);
		//region.setStrand(l[STRAND]);
		
		return true;
	}
	
	
	// ----------------------------------------------------------------------------

	/** 
	 * Get the feature with the given key, gives fatal error if the feature is not present.
	 * In gff, features are concatenated in one string in the last column. The syntax is:
	 * {key=["]<feature>["];} 
	 */
	public String getFeature(String key) {
		
		String featureStr = currentLine_[FEATURES];
		
		// find where the key starts
		int index = featureStr.indexOf(key);
		
		// check that the key is present
		if (index == -1)
			Flynet.error(new RuntimeException("Key '" + key + "' not found in the features of line " + lineCounter_));
		
		// check that the key is followed by '='
		if (featureStr.charAt(index+key.length()) != '=')
			Flynet.error(new RuntimeException("Key '" + key + "' not followed by '=' in line " + lineCounter_));
		
		// features are delimited by ';'
		int start = index + key.length() + 1; // +1 because of the '='
		int end = featureStr.indexOf(';', start);
		
		// check that the feature is correctly delimited
		if (end == -1)
			Flynet.error(new RuntimeException("Feature of key '" + key + "' not delimited by ';' in line " + lineCounter_));
		
		// features can optionally be enclosed in quotes
		if (featureStr.charAt(start) == '"')
			start++;
		if (featureStr.charAt(end-1) == '"')
			end--;
		
		// get the feature
		String feature = featureStr.substring(start, end);
		
		return feature;
	}

	
	// ----------------------------------------------------------------------------

	/** 
	 * Get a subfeature, gives a fatal error if it is not present. Syntax for subfeatures:
	 * <feature> = ["]subkey:<subfeature>["] {, ["]subkey:<subfeature>["]}
	 */
	public String getSubFeature(String key, String subkey) {

		// first, get the whole feature
		String feature = getFeature(key);
		
		// find where the subkey starts
		int index = feature.indexOf(subkey);
		
		// check that the key is present
		if (index == -1)
			Flynet.error(new RuntimeException("Subkey '" + subkey + "' not found in the subfeatures of line " + lineCounter_));
		
		// check that the key is followed by ':'
		if (feature.charAt(index + subkey.length()) != ':')
			Flynet.error(new RuntimeException("Subkey '" + subkey + "' not followed by ':' in line " + lineCounter_));
		
		// subfeatures are optionally delimited by ','
		int start = index + subkey.length() + 1; // +1 because of the ':'
		int end = feature.indexOf(',', start);
		// if there is no ',', the subfeature extends to the end of the string
		if (end == -1)
			end = feature.length() - 1;
		
		// subkey:subfeature can optionally be enclosed in quotes
		if (feature.charAt(end-1) == '"')
			end--;
		
		// get the subfeature
		String subfeature = feature.substring(start, end);
		
		return subfeature;
	}

	
	// ----------------------------------------------------------------------------

	/** Get the chromosome from the current line */
	public String getChromosomeString() {
		
		return currentLine_[CHROMOSOME];
	}
	

	// ============================================================================
	// GETTERS AND SETTERS

	public String[] getCurrentLine() { return currentLine_; }

	//public void setFeaturesHaveQuotes(boolean b) { featuresHaveQuotes_ = b; }


}
