/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.db.beans;
import org.rhwlab.db.MySql;
import java.sql.*;
import java.io.*;
import javax.xml.parsers.*;
import javax.xml.xpath.*;
import java.net.*;
import org.w3c.dom.*;
import java.util.*;
        
/**
 *
 * @author gevirl
 */
public class Transcript {
    /*
    public Transcript(String n,int l) throws Exception {
        this.name = n;
        this.length = l;
        if (builder == null){
            builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        }
        if (xpath == null){
            xpath = XPathFactory.newInstance().newXPath();
        }
    }
    */
    // update the database with this transcript
    public void updateDB(MySql db) throws Exception {
        // get the existing record
        String sql = String.format("Select * from Transcript where TranscriptName=\'%s\'",this.name);
        ResultSet rs = db.execute(sql);
        if (!rs.next()){
            // add a new record
            sql = String.format("Insert into Transcript (TranscriptName,Length) values (\'%s\',%d)", this.name,this.length);
            db.execute(sql);
        } else {
            String gene = this.WBGeneFromWormbase();
            sql = String.format("Update Transcript set Length=%d, WBGene=\'%s\' where TranscriptName=\'%s\'",this.length,gene,this.name);
            db.execute(sql);
        }
    }
    // update the transcript table with the start and stop locations from a file
    // last updated from file: /nfs/waterston/transcriptome/old/dcpm/transcript_chr_coords.txt
    static public void updateStartStop(File file)throws Exception {
        PreparedStatement state = MySql.getMySql().getStatement("Update Transcript set Start = ? , End = ? , Chrom = ? where TranscriptName = ?");
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        while (line != null){
            String[] tokens = line.split("\t");
            String sql = String.format("Select * from Transcript where TranscriptName=\'%s\'",tokens[0]);
            ResultSet rs = MySql.getMySql().execute(sql);
            if (!rs.next()){
                System.out.println(tokens[0]);
            } else {
                int start = Integer.valueOf(tokens[2]);
                int end = Integer.valueOf(tokens[3]);
                state.setInt(1,start);
                state.setInt(2,end);
                state.setString(3, tokens[1]);
                state.setString(4, tokens[0]);
                
                state.execute();
            }
            line = reader.readLine();
        }
    }
/*    
    // load the transcripts from a DCPM file into the database
    static public void loadDB(MySql db,File file) throws Exception {
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        while (line != null){
            String[] tokens = line.split(" ");
            System.err.println(tokens[0]);
            Transcript trans = new Transcript(tokens[0],Integer.valueOf(tokens[1]));
            trans.updateDB(db);
            line = reader.readLine();
        }
    }
    */
    // get a list of longest transcripts for each gene in the database
    static public TreeSet<String> longestTranscipts(MySql db) throws Exception {
        String sql = "Select distinct WBGene from Transcript";
        ResultSet rs = db.execute(sql);
        TreeSet<String> transcripts = new TreeSet<String>();
        while (rs.next()){
            String WBGene = rs.getString("WBGene");
            sql = String.format("Select * from Transcript where WBGene=\'%s\'",WBGene);
            ResultSet rs2 = db.execute(sql);
            String longest = null;
            int len = 0;
            while (rs2.next()){
                int l = rs2.getInt("Length");
                if (l > len){
                    len = l;
                    longest = rs2.getString("TranscriptName");
                }
            }
            if (longest != null) transcripts.add(longest);
        }
        return transcripts;
    }
    static public String longestTranscript(MySql db,String wbGeneID) throws Exception {
            String sql = String.format("Select * from Transcript where WBGene=\'%s\'",wbGeneID);
            ResultSet rs2 = db.execute(sql);
            String longest = null;
            int len = -1;
            while (rs2.next()){
                int l = rs2.getInt("Length");
                if (l > len){
                    len = l;
                    longest = rs2.getString("TranscriptName");
                }
            } 
            return longest;
    }
    // get the WBGeneID from WormBase for this transcript
    // this tries multiple configurations of the transcript name
    public String WBGeneFromWormbase() throws Exception {

        String[] tokens = this.name.split("\\.");
        for (int n=tokens.length ; n>0 ; --n){
            StringBuilder build = new StringBuilder();
            for (int i=0 ; i<n ; ++i){
                build.append(tokens[i]);
                if (i != n-1) build.append('.');
            }
            String geneName = build.toString();
            
            WBGene = tryGene(geneName);
            if (WBGene != null) return WBGene;
            
            if (geneName.endsWith("a")||geneName.endsWith("b")||geneName.endsWith("c")){
                int l = geneName.length()-1;
                geneName = geneName.substring(0, l);
                WBGene = tryGene(geneName);
                if (WBGene != null) return WBGene;
            }
        }

        return null;
    }
    // try to find a gene in Wormbase
    private String tryGene(String geneName) throws Exception {
            URL url = new URL(String.format("http://www.wormbase.org/db/misc/xml?name=%s;class=Gene_name",geneName));
            Document doc = builder.parse(url.openStream());
            
            String WBGene = xpath.evaluate("/ACEDB/Gene_name/Molecular_name_for/@value", doc);
            if (!WBGene.equals("")) return WBGene;
            
            WBGene = xpath.evaluate("/ACEDB/Gene_name/Sequence_name_for/@value", doc);
            if (!WBGene.equals("")) return WBGene;
            
            WBGene = xpath.evaluate("/ACEDB/Gene_name/Public_name_for/@value", doc);
            if (!WBGene.equals("")) return WBGene;  
            return null;
    }
    public String getName(){
        return name;
    }
    public Integer getLength(){
        return length;
    }
    
    public static void main(String[] args){
        try {
 //           MySql db = MySql.getMySql();
 //           Transcript.loadDB(db, new File(args[0]));
            Transcript.updateStartStop(new File(args[0]));
        } catch (Exception exc){
            exc.printStackTrace();
        }
    }
    String name;
    String WBGene;
    Integer length;
    String chromosome;
    Integer start;
    Integer end;
    
    static XPath xpath;
    static DocumentBuilder builder ;
}
