/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.chipseq;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.TreeMap;
import java.util.TreeSet;

/**
 *
 * @author gevirl
 */
public class StanfordDoc {
    public StanfordDoc(String file){
        this.file = file;
    }
    public void convert(PrintWriter writer,QualityDoc quality)throws Exception {
        writer.println("TF\tStrain\tStage\tInstitute\tMethod\tAntibody\tRep\tFastq\tmodencodeID");
       BufferedReader reader = new BufferedReader(new FileReader(file));
       String[] lines = read(reader);  // skip the heading
       lines = read(reader);
       int count = 0;
       while (lines != null){
           String[] tokens = lines[1].split("\t");

           String exper = String.format("%s_%s_%s_%s_%s",tokens[1],tokens[2],tokens[3],tokens[4],tokens[5]);
           expSet.add(exper);
           
           if (quality.getExperRejected().contains(exper)){
               System.out.printf("Mapping is Quality Rejected: %s\n", exper);
           }
           if (!quality.getExperAccepted().contains(exper)){
               ++count;
               System.out.printf("%d Mapping is Not Quality Accepted: %s\n",count, exper);
           }           
          
           if (lines[3] != null){
               String[] fastqs = lines[3].split(",");

               try {
                   int nReps = Integer.parseInt(tokens[6]);
                   for (String fastq : fastqs){
                       String[] splits = fastq.split(":");
                       String id = splits[0];
                       String fileName = splits[1];
                       if (!fileName.endsWith(".gz")){
                           fileName = fileName + ".gz";
                       }
                       String[] idTokens = id.split("_");
                       String rep = idTokens[5];
                       repSet.add(String.format("%s_%s_%s_%s_%s_%s_%s",idTokens[0],idTokens[1],idTokens[2],idTokens[3],idTokens[4],idTokens[6],idTokens[7]));
                       String source = String.format("%s_%s",idTokens[6],idTokens[7]);

                       if (tokens[1].equals("OP355")){
                           tokens[2] = "KLU-1";
                       }
                       writer.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\thttp://encodedcc.stanford.edu/ftp/modENCODE_VS_ENCODE/Regulation/Worm/fastq/%s\t%s\n",tokens[2],tokens[1],tokens[3],tokens[4],tokens[5],source,rep,fileName,tokens[7]);
                       writer.flush();
                   }
                } catch (Exception exc){
                   System.out.println(lines[1]);
               }
           }
           lines = read(reader);
           int iuhsaidf=0;
       }
    }
    private String[] read(BufferedReader reader)throws Exception {
        String[] lines = new String[4];
        lines[0] = reader.readLine();
        if (lines[0] == null) return null;
        
        lines[1] = reader.readLine();
        String[] tokens = lines[1].split("\t");
        if (tokens[6].equals("-")){
            lines[2] = lines[3] = null;
            return lines;
        }
        if (tokens.length==8 && tokens[7].equals("--")){
            lines[2] = null;
        }else {
            lines[2] = reader.readLine();
        }
        lines[3] = reader.readLine();
        return lines;
    }
    public TreeSet<String> getRespSet(){
        return this.repSet;
    }
    public TreeSet<String> getExpSet(){
        return this.expSet;
    }
    // args[0] - input doc
    // args[1] output converted file
    // args[2] - quality fileter doc
    static public void main(String[] args)throws Exception {
//        QualityDoc qualDoc = new QualityDoc(args[2]);
        QualityDoc qualDoc = new QualityDoc("/net/waterston/vol9/ChipSeq/olddata/Completed_factors_for_old_ModENCODE.csv");
        qualDoc.init();
        // repor thte quality experiments accepted
        PrintWriter writer = new PrintWriter("QualityExperiments");
        for (String exper : qualDoc.getExperAccepted()){
            writer.println(exper);
        }
        writer.close();
        
      
//        StanfordDoc doc = new StanfordDoc(args[0]);
        StanfordDoc doc = new StanfordDoc("/net/waterston/vol9/ChipSeq/olddata/ENCODEDataMapping");
//        doc.convert(new PrintWriter(args[1]),qualDoc);
        doc.convert(new PrintWriter("/net/waterston/vol9/ChipSeq/olddata/modencodeFastqSummary.2"),qualDoc);
        writer = new PrintWriter("MappingExperiments");
        for (String exper :doc.getExpSet()){
            writer.println(exper);
        }
        writer.close();          
        // check the results
        TreeSet<String> set = doc.getRespSet();
        int count = 0;
        for (String qualityRep : qualDoc.getRepsAccepted()){
            // is quality rep in the mapping file
            if (set.contains(qualityRep)){
//                System.out.println("Contains: "+qualityRep);
            } else {
                ++count;
                System.out.printf("%d Quality missing from mapping: %s\n",count,qualityRep);
            }
        }
        int isadfhuis=0;
    }
    String file;
    TreeSet<String> repSet = new TreeSet<>();
    TreeSet<String> expSet = new TreeSet<>();
}
