/*
Copyright (c) 2011-2012 Daniel Marbach(1,2)

(1) Massachusetts Institute of Technology, Cambridge MA, USA
(2) Broad Institute of MIT and Harvard
 
We release this software open source under an MIT license (see below). If this
software was useful for your scientific work, please cite our paper available at:
http://compbio.mit.edu/flynet

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
 */
package edu.mit.compbio.flynet;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.HashMap;


/**
 * Translates FBgns, gene names, and CG numbers back and forth.
 * Implements the Singleton pattern.
 */
public class Translator {

	/** The unique instance of Translator (Singleton design pattern) */
	private static Translator instance_ = null;

	/** The FlyBase file with the FBgn - annotation id table */
	String fbgnNamesFile_ = "resources/flybase/fbgn_annotation_ID_fb_2010_03.tsv";
	/** The number of header lines to be skipped in the fbgnNamesFile_ */
	int numHeaderLinesFbgnNamesFile_ = 5;

	/** Hash table (CG, FBgn) */
	private HashMap<String, String> cg2fb_ = null;
	/** Hash table (FBgn, CG) */
	private HashMap<String, String> fb2cg_ = null;
	/** Hash table (secondary CG, FBgn) */
	private HashMap<String, ArrayList<String>> secondaryCg2fb_ = null;
	/** Hash table (secondary FBgn, FBgn) */
	private HashMap<String, ArrayList<String>> secondaryFb2fb_ = null;
	/** Hash table (custom_symbol, FBgn)*/
	private HashMap<String, ArrayList<String>> customSymbol2fb_ = null;
	
	/** Keep track of the secondary CGs/FBgns that could be mapped to unique FBgns */
	private LinkedHashSet<String> secondary2uniqueFbgn_ = null;
	/** Keep track of the secondary CGs/FBgns that were mapped to multiple FBgns */
	private LinkedHashSet<String> secondary2multipleFbgns_ = null;
	/** Keep track of CGs/FBgns that were not found */
	private LinkedHashSet<String> notFound_ = null;
	
	
	// ============================================================================
	// PUBLIC METHODS
	
	/** Create / get instance */
	static public Translator getInstance() {
		
		if (instance_ == null)
			instance_ = new Translator();

		return instance_;
	}
	
	
	// ----------------------------------------------------------------------------

	/** Default constructor, initializes the hash tables */
	private Translator() {
		initializeHashMaps();
	}
	
	
	// ----------------------------------------------------------------------------

	/** Reset secondaryCgUniqueFbgn_, secondaryCgMultipleFbgns_, and cgNotFound_ */
	public void clearWarningLists() {
		
		secondary2uniqueFbgn_ = new LinkedHashSet<String>();
		secondary2multipleFbgns_ = new LinkedHashSet<String>();
		notFound_  = new LinkedHashSet<String>();
		
		notFound_.add("Not_found:");
		secondary2uniqueFbgn_.add("Mapped_to_unique_FBgn:");
		secondary2multipleFbgns_.add("Mapped_to_multiple_FBgns:");
	}


	// ----------------------------------------------------------------------------

	/** Check if the given string is a primary FBgn */
	public boolean isPrimaryFbgn(String fbgn) {
		return fb2cg_.containsKey(fbgn);
	}
	
	
	// ----------------------------------------------------------------------------

	/** Check if the given list of strings are all primary FBgns */
	public boolean isPrimaryFbgn(ArrayList<String> fbgn) {
		
		for (int i=0; i<fbgn.size(); i++)
			if (!isPrimaryFbgn(fbgn.get(i)))
				return false;
		
		return true;
	}

	
	// ----------------------------------------------------------------------------

	/** Get the primary fbgn(s) of given cg or symbol, return null if not found */
	public ArrayList<String> toPrimaryFBgn(String label) {
		
		ArrayList<String> fbgns = null;
		
		// If it's already a primary FBgn, simply return it
		if (isPrimaryFbgn(label)) {
			fbgns = new ArrayList<String>();
			fbgns.add(label);
			return fbgns;

		// If it's a primary CG to primary FBgn mapping, return the FBgn
		} else if (cg2fb_.containsKey(label)) {
			fbgns = new ArrayList<String>();
			fbgns.add(cg2fb_.get(label));
			return fbgns;
		
		} else if (customSymbol2fb_.containsKey(label)) {
			fbgns = customSymbol2fb_.get(label);
		} else if (secondaryCg2fb_.containsKey(label)) {
			fbgns = secondaryCg2fb_.get(label);
		} else if (secondaryFb2fb_.containsKey(label)) {		
			fbgns = secondaryFb2fb_.get(label);
		}
		
		// Keep track of secondary labels that have been mapped
		if (fbgns == null)
			notFound_.add(label);
		else if (fbgns.size() == 1) 
			secondary2uniqueFbgn_.add(label);
		else if (fbgns.size() > 1)
			secondary2multipleFbgns_.add(label);
		else
			assert false;

		return fbgns;
	}

	
	// ----------------------------------------------------------------------------

	/**
	 * Check that all FBgns in the given column are primary FBgns (in the annotation file fbgnNamesFile_),
	 * map secondary FBgns to the corresponding primary FBgns.
	 */
	public void toPrimaryFBgn(String filename, int column, int numHeaderLines, boolean deleteLines) {
		
		clearWarningLists();

		Flynet.println("Mapping column " + (column+1) + " to primary FBgns, skipping the first " + numHeaderLines + " lines...");
		Flynet.println("Reading file " + filename + "...");
		FileParser parser = new FileParser(filename);
		
		String outputFile = Flynet.insertBeforeFileExtension(filename, "_mapped_c" + (column+1));
		Flynet.println("Writing file " + outputFile + "...");
		FileExport writer = new FileExport(outputFile);

		// Copy the header lines
		int l = 0; // line counter
		for (int i=0; i<numHeaderLines; i++) {
			String line = parser.readRawLine();
			writer.println(line);
			l++;
		}
		
		// For all lines
		while (true) {
			// Read the line
			String[] currentLine = parser.readLine();
			if (currentLine == null)
				break;
			l++;
			
			// Check that the line has enough columns
			if (currentLine.length < column)
				throw new RuntimeException("Translating column " + (column+1) + ", line " + (l+1) + " has only " + currentLine.length + " columns");
				
			String currentLabel = currentLine[column];
			ArrayList<String> fbgns = toPrimaryFBgn(currentLine[column]);
				
			// If this label was not found 
			if (fbgns == null) {
				if (deleteLines) {
					continue;
				} else {
					fbgns = new ArrayList<String>();
					fbgns.add(currentLabel);
				}
			}
			
			// Write a line for every FBgn mapped
			for (int i=0; i<fbgns.size(); i++) {
				currentLine[column] = fbgns.get(i);
				writer.println(currentLine);
			}		
		}
		writer.close();
		parser.close();
		
		String logFile = Flynet.insertBeforeFileExtension(filename, "_log_c" + (column+1));
		printWarnings(logFile);
	}
		
	
	// ----------------------------------------------------------------------------
	
	public void printWarnings(String filename) {
		
		// construct the log
		ArrayList<String[]> log = new ArrayList<String[]>();
		log.add(notFound_.toArray(new String[0]));
		log.add(secondary2uniqueFbgn_.toArray(new String[0]));
		log.add(secondary2multipleFbgns_.toArray(new String[0]));

		// write the log
		System.out.println("Writing file " + filename + "...");
		FileExport writer = new FileExport(filename);
		
		for (int i=0; i<log.size(); i++)
			writer.println(log.get(i));
		writer.close();
		
		Flynet.println("Warnings (see log for details):");
		Flynet.println(notFound_.size()-1 + "\tnot found"); // -1 because we added the header column "Not_found"
		Flynet.println(secondary2uniqueFbgn_.size()-1 + "\tmapped to unique FBgn");
		Flynet.println(secondary2multipleFbgns_.size()-1 + "\tmapped to multiple FBgns");
	}

	
	// ============================================================================
	// PRIVATE METHODS

	private void initializeHashMaps() {
		
		// Create the maps
		cg2fb_ = new HashMap<String, String>();
		fb2cg_ = new HashMap<String, String>();
		secondaryCg2fb_ = new HashMap<String, ArrayList<String>>();
		secondaryFb2fb_ = new HashMap<String, ArrayList<String>>();
		
		initializeCustomSymbols();
		
		// Create lists keeping track of secondary CGs/FBgns
		clearWarningLists();
		
		Flynet.println("Reading annotation file " + fbgnNamesFile_);
		FileParser parser = new FileParser(fbgnNamesFile_);
		parser.skipLines(numHeaderLinesFbgnNamesFile_);

		// The columns in the raw data table
		int colSymbol = 0;
		int colFbgn = 1;
		int col2ndFbgn = 2; // secondary fbgns
		int colCg = 3;
		int col2ndCg = 4; // secondary cgs
		
		while (true) {
			
			// The current line in the file
			String[] currentLine = parser.readLine();
			if (currentLine == null)
				break;
			
			// Only add entries with a CG or CR id (protein coding and non-coding genes of dmel,
			// the other ids are for other species)
			if (currentLine[colCg].startsWith("CG") || currentLine[colCg].startsWith("CR")) {
				
				String fbgn = currentLine[colFbgn];
				String cg = currentLine[colCg];
				String symbol = currentLine[colSymbol];
				
				// (fbgn, cg)
				fb2cg_.put(fbgn, cg);

				// (cg, fbgn)
				cg2fb_.put(cg, fbgn);

				// (symbol, fbgn) If the gene has a symbol, we also add it to the cg2fb map
				if (!cg.equals(symbol))
					cg2fb_.put(symbol, fbgn);
				
				// (cg2, fbgn) (a cg2 may point to several fbgn)
				if (currentLine.length >= col2ndCg+1 && !currentLine[col2ndCg].equals("")) {
				
					String[] cg2 = currentLine[col2ndCg].split(",");
					for (int j=0; j<cg2.length; j++) {

						// check if this cg2 is already in the map
						ArrayList<String> fbgnList = secondaryCg2fb_.get(cg2[j]);
						// if it is not in the map, create and add a new entry
						if (fbgnList == null) {
							fbgnList = new ArrayList<String>();
							fbgnList.add(fbgn);
							secondaryCg2fb_.put(cg2[j], fbgnList);
						} else {
							fbgnList.add(fbgn);
						}
					}
				}
				
				// (fbgn2, fbgn) (a fbgn2 may point to several fbgn)
				if (!currentLine[col2ndFbgn].equals("")) {
				
					String[] fb2 = currentLine[col2ndFbgn].split(",");
					for (int j=0; j<fb2.length; j++) {

						// check if this fb2 is already in the map
						ArrayList<String> fbgnList = secondaryFb2fb_.get(fb2[j]);
						// if it is not in the map, create and add a new entry
						if (fbgnList == null) {
							fbgnList = new ArrayList<String>();
							fbgnList.add(fbgn);
							secondaryFb2fb_.put(fb2[j], fbgnList);
						} else {
							fbgnList.add(fbgn);
						}
					}
				}
				
			}
		}
		parser.close();
	}


	// ----------------------------------------------------------------------------

	/** Define some customized symbols, which are added to the secondary CGs */
	private void initializeCustomSymbols() {

		// Define some customized symbols
		customSymbol2fb_ = new HashMap<String, ArrayList<String>>(5);
		
		// Motifs in Pouya's files
		
		String symbol = "AP-1"; // search in flybase leads to kay and Jra (both have AP-1 as a symbol synonym)
		ArrayList<String> ap1 = new ArrayList<String>();
		ap1.add("FBgn0001297"); // kay
		ap1.add("FBgn0001291"); // Jra
		customSymbol2fb_.put(symbol, ap1);
		
		symbol = "Dif/Rel";
		ArrayList<String> difrel = new ArrayList<String>();
		difrel.add("FBgn0011274"); // Dif
		difrel.add("FBgn0014018"); // Rel
		customSymbol2fb_.put(symbol, difrel);
		
		symbol = "EcR/usp";
		ArrayList<String> ecrusp = new ArrayList<String>();
		ecrusp.add("FBgn0000546"); // EcR
		ecrusp.add("FBgn0003964"); // usp
		customSymbol2fb_.put(symbol, ecrusp);

		symbol = "TorRE"; // Ask Pouya (in my old file I map this to FBgn0028386, but I don't know why, it's a different gene)
		ArrayList<String> torre = new ArrayList<String>();
		torre.add("FBgn0021796"); // Tor
		customSymbol2fb_.put(symbol, torre);

		// RedFly
		
		symbol = "Deb-A"; // RedFly symbol (not found because standard in flybase is DebA)
		ArrayList<String> deba = new ArrayList<String>();
		deba.add("FBgn0000425"); // DebA
		customSymbol2fb_.put(symbol, deba);

		// Check whether the defined FBgns are all primary FBgns
		// (important because the annotation may change in the future)
		ArrayList<String> fbgns = new ArrayList<String>();
		fbgns.addAll(ap1);
		fbgns.addAll(difrel);
		fbgns.addAll(ecrusp);
		fbgns.addAll(torre);
		fbgns.addAll(deba);
		isPrimaryFbgn(fbgns);
	}
	
}
