/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.LMS.sci;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ForkJoinPool;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.rhwlab.db.MySql;

/**
 *
 * @author gevirl
 */
public class UMI_attach implements Callable<Object> {
    String sample;
    File[] files;
    TreeSet<String> barcodes;
    File outDir;
    
    public UMI_attach(String sample, File[] files, TreeSet<String> barcodes, File outDir){
        this.sample = sample;
        this.files = files;
        this.barcodes = barcodes;
        this.outDir = outDir;
    }
    @Override
    public Object call() throws Exception {
        LevenshteinDistance leven = LevenshteinDistance.getDefaultInstance();
        
        // find the R1 and R2 files
        File r1 = null;
        File r2 = null;
        for (File file : files){
            String name = file.getName();
            if (name.contains(sample)){
                if (name.contains("R1")){
                    r1 = file;
                } else if (name.contains("R2")){
                    r2 = file;
                }
            }
        }
        
        // open the files
        BufferedReader read1 = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(new FileInputStream(r1),true)));
        BufferedReader read2 = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(new FileInputStream(r2),true)));
        File outFile = new File(outDir,String.format("%s.R2.fastq.gz",sample));
        GzipCompressorOutputStream gzipOut = new GzipCompressorOutputStream(new FileOutputStream(outFile));
        PrintStream outStream = new PrintStream(gzipOut);
        String[] r1Lines = new String[4];
        String[] r2Lines = new String[4];

        // process the files
        readInto(read1,r1Lines);
        readInto(read2,r2Lines);
        while (r1Lines[0] != null){
            String umi = r1Lines[1].substring(0,8);
            String target = r1Lines[1].substring(8);
            if (barcodes.contains(target)){
                outputAttached(target,umi,r2Lines,outStream);
            }else {
                // find a barcode with only one mismatch
                boolean found = false;
                for (String barcode : barcodes){
                    Integer miss = leven.apply(barcode, target);
                    if (miss == 1){
                        found = true;
                        outputAttached(barcode,umi,r2Lines,outStream);
                        break;
                    }
                }
                if (!found){
//                    System.out.println(target);
                }
            }
            readInto(read1,r1Lines);
            readInto(read2,r2Lines);                
        }
        // close the files
        read1.close();
        read2.close();
        outStream.close();
        return null;
    }   
    private void outputAttached(String barcode,String umi,String[] r2,PrintStream stream){
        stream.printf("@%s,%s,%s\n", barcode,umi,r2[0].substring(1));
        stream.println(r2[1]);
        stream.println(r2[2]);
        stream.println(r2[3]);
    }

    static void readInto(BufferedReader reader,String[] into)throws Exception {
        for (int i=0 ; i<into.length ; ++i){
            into[i] = reader.readLine();
        }
    }
    
    
    // args[0] - input fastq directory
    // args[1] - rt barcode file
    // args[2] - output directory - UMIattach directory 
    // args[3] - seq id
    static public void main(String[] args)throws Exception {

        // read the barcodes file
        TreeSet<String> barcodes = new TreeSet<>();
        BufferedReader reader = new BufferedReader(new FileReader(args[1]));
        String barcode = reader.readLine();
        while (barcode != null){
            barcodes.add(barcode);
            barcode = reader.readLine();
        }
        reader.close();
        
        // form the list of samples
        File[] inFiles = new File(args[0]).listFiles();
        TreeSet<String> samples = new TreeSet<>();
        for (File file : inFiles){
            samples.add(file.getName().substring(0,file.getName().indexOf(".")));
        }
        
        // make the UMI attched directory
        File umiDir = new File(args[2]);
        ProcessBuilder pb = new ProcessBuilder("mkdir","-p",umiDir.getPath());
        Process p = pb.start();
        p.waitFor();  
        
        ForkJoinPool pool = new ForkJoinPool();
        ArrayList<UMI_attach> tasks = new ArrayList<>();
        for (String sample : samples){
            UMI_attach attach = new UMI_attach(sample,inFiles,barcodes,umiDir);
            tasks.add(attach);
        }
        pool.invokeAll(tasks);
        
        PreparedStatement state = MySql.getMySql().getStatement("update sciSequencing set UMIattachCompleted = ? where SequencingID = ?");
        state.setTimestamp(1, new Timestamp(new java.util.Date().getTime()));
        state.setString(2, args[3]);
        state.execute();
    }

}
