#!/prg/Modules/language/python/2.7

import os
import re
#Cutadapt filter 
#Filters for TOT
#parameters based on macmannus 2014 exept for maximum total RNA size

########################## TOT ##########################################
#cutadapt parameters for TOT
cut_m=27
cut_M=40
cut_e=0.12
#cut_a="AGATCGGAAGAGCACACGTCT" #-a car adapteur en 3'
#"TCACGTTCTAGCATTCAAGGTCCCCTGTAGGCACCATCAATAGATCGGAA"
cut_a="CTGTAGGCACCATCAAT"

#Lectures 
dico_link={"TOT_Cer1":"../../../../00data_seq/mcmannus2014/reads/SRA/S_cer_mRNA_RiboProf_matched_Rep1/SRR948551.fastq",\
"RPF_Cer1":"../../../../00data_seq/mcmannus2014/reads/SRA/S_cer_Ribo-seq_matched_Rep1/SRR948553.fastq", \
"TOT_Cer2":"../../../../00data_seq/mcmannus2014/reads/SRA/S_cer_mRNA_RiboProf_matched_Rep2/SRR948552.fastq",\
"RPF_Cer2":"../../../../00data_seq/mcmannus2014/reads/SRA/S_cer_Ribo-seq_matched_Rep2/SRR948555.fastq"}


dir_results="../../13_TE_Cer/01_cutadapt/"


for lib in dico_link.keys():
    dir_fastq=dico_link[lib]
    
    dirout=dir_results+lib+"_cutadapt.fastq"
    filelog=dirout.replace("fastq","log")
    
    dirlog="../../13_TE_Cer/01_cutadapt/"+filelog
    print lib
    #Commande pour executer cutadapt
    command_line="cutadapt -a "+cut_a+" -m "+str(cut_m)+" -M "+str(cut_M)+" -e "+str(0.12)+" "+dir_fastq+" > "+dirout+" 2> "+dirlog
    print command_line
    #On execute la commande 
    os.system(command_line)





import os

#liens pour base bowtie
dir_fasta="../../../../00data_seq/cerevisiae/genomes/S288C.fasta"
dir_db=dir_fasta.replace(".fasta","")

#Genome indexation #############################################
#command_db= "bowtie-build "+dir_fasta+" "+dir_db
#print command_db
#os.system(command_db)
#==============================================================

#Liens pour mapping 
#Lectures 
dico_link2={"TOT_Cer1":"../../13_TE_Cer/01_cutadapt/TOT_Cer1_cutadapt.fastq" ,\
"RPF_Cer1":"../../13_TE_Cer/01_cutadapt/RPF_Cer1_cutadapt.fastq", \
"TOT_Cer2":"../../13_TE_Cer/01_cutadapt/TOT_Cer2_cutadapt.fastq",\
"RPF_Cer2":"../../13_TE_Cer/01_cutadapt/RPF_Cer2_cutadapt.fastq"}


dir_results="../../13_TE_Cer/02_mapping/"

for lib in dico_link2.keys():
    dir_fastq=dico_link2[lib]
    print lib
    #Fichiers de sortie 
    file_sam=lib+"onS288C.sam"
    file_log=file_sam+".log"
    dir_sam=dir_results+file_sam
    dir_log=dir_results+file_log
    
    #Mapping 
    command_map= "bowtie -S "+dir_db+" "+dir_fastq+ " -v 1 --chunkmbs 500 --best > "+dir_sam+" 2> "+dir_log
    print command_map
    os.system(command_map)
    #==============================================================
    
    #Convert files for analysis 
    # convert sam to bam 
    #dir_sam="mapping/results/RPF_CC1onA03_IDBA.sam"
    dir_bam=dir_sam.replace(".sam",".bam")
    command_sb = "samtools view -bS "+dir_sam+" > "+dir_bam
    print command_sb
    os.system(command_sb)
    #==============================================================
    
    #sort bam
    dir_sort=dir_bam.replace(".bam","_sorted")
    command_sort="samtools sort "+dir_bam+"  "+dir_sort
    print command_sort
    os.system(command_sort)
    #=========================================================
    
    #Indexation
    command_index="samtools index "+dir_sort+".bam "+dir_sort+".bam.bai"
    print command_index
    os.system(command_index)
    #=========================================================
    
    #On efface l'ancien fichier sam non trie pour faire de la place

