from gfpvar.tools.common import *
import glob
import os

def main():
    REF_FILE = "%s/seq/ref/S288c-ref.fasta"%DATA_DIR    
    COMBINE_CMD = "bwa sampe %s %s %s %s %s | samtools view -bSu -F 0x04 - | samtools sort -m 536883412 - %s  " # ref read1_align read2_align read1_fasta read2_fasta outfile
    ALNFILE_PATTERN = "%s/seq/Sample4*/1_align/lane*read-1.aln"%DATA_DIR

    for f in glob.glob(ALNFILE_PATTERN): # for all read 1 alignment files        
	#if f.count("G1") == 0: continue # skip a specific file

        sample = f.split("/")[-3]
        lane = f.split("-")[-2].split("_")[0] # ..../lane-L002_read-1.aln
        print sample, lane
        outdir = "%s/seq/%s/1_align"%(DATA_DIR,sample) # get sample, output directory
        outfile = "%s/align_lane-%s_%s"%(outdir, lane, sample)
        if len(glob.glob("%s*"%outfile)) > 0: 
            print "Skipping"
            continue
        aln1 = f # construct filenames for generated alignments and original sequence fastq 
        aln2 = f.replace("read-1","read-2")
        seq1 = glob.glob("%s/seq/%s/0_reads/*_%s_R1_*"%(DATA_DIR, sample, lane))[0]
        seq2 = seq1.replace("_R1_", "_R2_")

        scriptfile = "%s/sampe_%s_%s.sh"%(outdir, sample, lane)
        ofh = file(scriptfile, 'w') # create a script file
        ofh.write(COMBINE_CMD%(REF_FILE, aln1, aln2, seq1, seq2, outfile) + "\n") 
        ofh.close()
        os.system("submitjob sh %s"%(scriptfile)) # and run it


if __name__ == '__main__':
    main()
