from gfpvar.tools.common import *
import glob
import os

def main():
    ALN_CMD = "bwa aln %s/seq/ref/S288c-ref.fasta %s > %s/lane-%s_read-%d.aln"
    READ1FILE_PATTERN = "%s/seq/Sample4_*/0_reads/*L00?_R1_001.fastq.gz"%DATA_DIR

    for f in glob.glob(READ1FILE_PATTERN): # for all read 1 files
        sample = f.split("/")[-3]
        outdir = "%s/seq/%s/1_align"%(DATA_DIR,sample) # get sample, output directory
        lane = f.split("_")[-3]

        for read in [1,2]: # for both reads
            scriptfile = "%s/align_%s-%s-%d.sh"%(outdir, sample, lane, read) # create a script file for aligning the read file to reference
            if os.path.exists("%s/lane-%s_read-%d.aln"%(outdir, lane, read)): print "Done", lane, read; continue
            ofh = file(scriptfile, 'w') 
            ofh.write(ALN_CMD%(DATA_DIR, f.replace("_R1_", "_R%d_"%read), outdir, lane, read) + "\n")
            ofh.close()
            
            os.system("submitjob sh %s"%(scriptfile)) # and run it


if __name__ == '__main__':
    main()
