package iptgxdb.converter;

import java.io.File;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;

import iptgxdb.utils.CLIUtils;

public class Convert2GFF {

	@SuppressWarnings("serial")
	public static Options options = new Options() {{
		addOption( CLIUtils.createArgOption("chemgenome", "file", "one or more fasta files generated by ChemGenome 2.0", false, true) );
		addOption( CLIUtils.createArgOption("cmr", "file", "a tab-separated tabular text file from CMR", false, false) );
		addOption( CLIUtils.createArgOption("ensembl", "file", "a plain-text Ensembl file", false, false) );
		addOption( CLIUtils.createArgOption("genbank", "file", "a plain-text GenBank file", false, false) );
		addOption( CLIUtils.createArgOption("genoscope", "file", "a tab-separated tabular text file from Genoscope/MAGE", false, false) );
		addOption( CLIUtils.createArgOption("broad", "file", "a BROAD genome summary per gene file", false, false) );
		addOption( CLIUtils.createArgOption("short", "file", "a tab-separated short ORF file", false, false) );
		addOption( CLIUtils.createArgOption("repseek", "file", "a repseek R-Table output file", false, false) );
		addOption( CLIUtils.createArgOption("out", "output", "the output gff3 file", true, false) );
		addOption( CLIUtils.createArgOption("id", "sequenceId", "the sequence id for the GFF file [default: guessed from input file]", false, false) );
		addOption( CLIUtils.createArgOption("tab", "output", "a tab-separated output file with sequences per entry [requires seq]", false, false) );
		addOption( CLIUtils.createArgOption("seq", "input", "the genomic sequence", false, false) );
		addOption( CLIUtils.createArgOption("extend", "e", "extend all features 3' by <e> nucleotides or automatically if e='auto'", false, false) );
		addOption( new Option("all", "convert exentsive, i.e. all features and attributes") );
	}};
	
	public static void printUsageAndExit() {
		new HelpFormatter().printHelp("java -jar iPtgxDB_convert.jar", "Genome Annotation Converter by Ulrich Omasits", options, null, true);
		System.exit(0);
	}
	
	public static void main(String[] args) throws Exception {
		if (args.length>0 && args[0].equals("debug")) {
			// setting the debug arguments
			args = new String[]{
					"-ensembl","C:/temp/NCTC13400_E_coli_presub_v0.1.embl",
					"-out","C:/temp/NCTC13400_E_coli_presub_v0.2.gff3",
					"-id","NCTC13400",
					"-all"
			};
		}

		// parse the command line arguments
		CommandLine cli = null;
		try {
			cli = new PosixParser().parse( options, args );
		} catch (ParseException e) {
			System.out.println(e.getMessage());
			printUsageAndExit();
		}
		
		// check input
		File fOut = CLIUtils.getFileOption(cli, "out", true);
		File fTab = CLIUtils.getFileOption(cli, "tab", true);
		String seqId = cli.getOptionValue("id", null); //"Bhen_NC005956";
		
		File fSeq = CLIUtils.getFileOption(cli, "seq", false);
		if (fTab != null && fSeq == null) {
			System.err.println("ERROR: provide the genome sequence using the seq parameter!");
			System.exit(-1);
		}
		
		AConverter converter = null;
		File[] fIn = null;
		
		String inputOption = null;
		if (cli.hasOption("chemgenome")) {
			inputOption = "chemgenome";
			converter = new ChemGenome2GFF();
		} else if (cli.hasOption("cmr")) {
			inputOption = "cmr";
			converter = new CMR2GFF();
		} else if (cli.hasOption("ensembl")) {
			inputOption = "ensembl";
			converter = new Ensembl2GFF();
		} else if (cli.hasOption("genbank")) {
			inputOption = "genbank";
			converter = new GenBank2GFF();
		} else if (cli.hasOption("genoscope")) {
			inputOption = "genoscope";
			converter = new Genoscope2GFF();
		} else {
			System.out.println("You have to specify ONE input file!");
			printUsageAndExit();
		}
		
		fIn = CLIUtils.getFileArray(cli, inputOption);
		
		boolean extensive = cli.hasOption("all");
		
		String extend = cli.getOptionValue("extend");
		
		converter.convert(seqId, fIn, fOut, fTab, fSeq, extensive, extend);
	}
}
