#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Dec  7 23:04:48 2020

@author: pdf


The idea of this file is to provide in a central place all of the folder paths 
that are used by the script. There are quite a number of folders so a second
funtion of this script is to premake the folders that are used for output 
since python doesn't immediately do that. 








"""

import matplotlib.pyplot as plt

plt.rcParams.update({'font.serif':'Arial'})
plt.rcParams.update({'font.family':'serif'})
plt.rcParams.update({'font.serif':'Arial'})


from matplotlib.backends.backend_pdf import PdfPages


import numpy as np

import pyfaidx
input_fasta = '/home/nzs2002/work/genome/Homo_sapiens_UCSC_hg38/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa'
genome_fasta = pyfaidx.Fasta(input_fasta)




from maxentpy import maxent
from maxentpy.maxent import load_matrix5, load_matrix3
matrix5 = load_matrix5()
matrix3 = load_matrix3()

import datetime







color_dict = dict()
color_dict['lncRNA'] = '#00B9F1'
color_dict['Antisense'] = '#DA6FAB'
color_dict['mRNA'] = '#0072BC'
color_dict['Intergenic'] = '#F15A22'
color_dict['Intronic'] = '#00A875'
color_dict['Alternative'] = '#ECDE38'


color_key_dict = dict()
color_key_dict['lncRNA_middle_exon_ids'] = color_dict['lncRNA']

color_key_dict['antisense_transcript_set'] = color_dict['Antisense']

color_key_dict['pc_middle_exon_id_list'] = color_dict['mRNA']

color_key_dict['no_overlap_set'] = color_dict['Intergenic']

color_key_dict['intron_interior_set'] = color_dict['Intronic']


















'''
This checks if a path exists and if it does not then makes it.
'''
def make_dir_path(dir_path):
    import os

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("Created directory path:" , dir_path )
    else:    
        if True == False:
            print("Path already exists:" , dir_path )    


''' DEFINE PATHS TO CREATE FOR OUTPUTS '''

class exp_output_path_class():
    def __init__(self):
        self



#main project class declaration
exp_output_path = exp_output_path_class()


###other_files
exp_output_path.hisat2_index_std_chroms = '/home/nzs2002/work/genome/2bit/hg38_no_variant_chrom/hg_38_main_chroms'

'''
exp_output_path.Tra2B_pre_align_index ='/home/pdf/work/human_genome/indexes/2bit/Tra2B_reporter_intron_2bit/Tra2B_reporter_intron_index_2bit'

exp_output_path.log_odds_framework_path = '/home/pineapple/repositories/exon_def/tools/log_odds_framework/'

exp_output_path.hg38_genome_fasta ='/home/pineapple/work/indexes/downloaded/Homo_sapiens_UCSC_hg38/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa'
'''

exp_output_path.hg38_genome_fasta = '/home/nzs2002/work/genome/Homo_sapiens_UCSC_hg38/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa'


exp_output_path.git_repo = '/home/nzs2002/repositories/exon_def/slim_ET/'




###project_files

exp_output_path.initial_fastq_main_files = '/home/nzs2002/work/slim_ET/'
#exp_output_path.initial_fastq_main_files = '/home/nzs2002/work/slim_ET/ETF_20210210/'

#prepare_reads_for_alignment
exp_output_path.initial_fastq_input_files = exp_output_path.initial_fastq_main_files + 'reads/'

exp_output_path.trimmed_fastq_input_files = exp_output_path.initial_fastq_main_files + 'reads/trimmed/'
exp_output_path.trimmed_fastq_SAM_files = exp_output_path.initial_fastq_main_files + 'SAM/'
exp_output_path.trimmed_fastq_SAM_files = exp_output_path.initial_fastq_main_files + 'SAM/md5/'

#exp_output_path.trimmed_fastq_input_files = '/mnt/0862DDC375402D9E/sequencing_data/20201127/test_python_tar_extract/gzip/' + 'trimmed/'
#align_processed_exon_trapping_data


#build_exon_interval_from_simple_paired_data
exp_output_path.initial_fastq_main_files
exp_output_path.trimmed_fastq_input_files_figures = exp_output_path.trimmed_fastq_input_files + 'figures/'


#main data location

exp_output_path.exons_data = exp_output_path.initial_fastq_main_files + 'main_data/' 
exp_output_path.exons_data_merge_plots = exp_output_path.exons_data + 'plots/'
exp_output_path.exons_data_merge_plots_weblog_3ss = exp_output_path.exons_data_merge_plots + 'weblogo/3ss/aggregate/'
exp_output_path.exons_data_merge_plots_weblog_5ss = exp_output_path.exons_data_merge_plots+ 'weblogo/5ss/aggregate/'
exp_output_path.exons_data_merge_dinucleotide_box_plots = exp_output_path.exons_data_merge_plots+'dinucleotide_box_plots/'
exp_output_path.exons_data_merge_minor_location_histogram = exp_output_path.exons_data_merge_plots+'minor_location_histogram/'
exp_output_path.exons_data_merge_score_histograms = exp_output_path.exons_data_merge_plots+'score_histograms/'
exp_output_path.dfam_plots=exp_output_path.exons_data + 'dfam/plots/'


#/media/8TB_1/ETF_data/20200202/main_data/GENCODE
exp_output_path.GENCODE_dir = exp_output_path.exons_data + 'GENCODE/'
exp_output_path.GENCODE_text = exp_output_path.GENCODE_dir +'text/'
exp_output_path.GENCODE_plot = exp_output_path.GENCODE_dir +'plot/'



exp_output_path.exons_data_merge_exp_an_un_plots = exp_output_path.exons_data_merge_plots + 'expression/an_vs_un/'

exp_output_path.main_database_dir=exp_output_path.exons_data+'sqlite3/'

exp_output_path.pickle_main  = '/home/nzs2002/work/slim_ET/pickles/ETF_20210210/'
exp_output_path.pickle_individual = exp_output_path.pickle_main + 'libraries/'
exp_output_path.pickle_pre_merge = exp_output_path.pickle_main + 'libraries/pre_merge/'
exp_output_path.pickle_merged = exp_output_path.pickle_main + 'merged/'
exp_output_path.SpliceAI_ET_exons = exp_output_path.pickle_main + 'SpliceAI_ET_exons/'
exp_output_path.Dfam_pickle = exp_output_path.pickle_main + 'Dfam_pickle/'


exp_output_path.out_bed_main  = exp_output_path.pickle_main + 'bed_output_files/'




exp_output_path.ssd_tmp  = '/home/nzs2002/work/slim_ET/ssd_scratch/'

exp_output_path.reshape_sam_read_assignments = exp_output_path.exons_data_merge_plots + 'reshape_sam_read_assignments/'

exp_output_path.ET_spliceAI_venn_2 = exp_output_path.exons_data_merge_plots + "ET_spliceAI_venn_2/"

exp_output_path.spliceAI_chr_17_analysis = exp_output_path.exons_data_merge_plots + "spliceAI_chr_17_analysis/"

exp_output_path.load_merged_aggregate_exon_dict = exp_output_path.exons_data_merge_plots + "load_merged_aggregate_exon_dict/"


exp_output_path.Parse_ENSEMBLE_GENCODE_exons_load = exp_output_path.exons_data_merge_plots + "Parse_ENSEMBLE_GENCODE_exons_load/"

exp_output_path.exon_number_expectation = exp_output_path.exons_data_merge_plots + "exon_number_expectation/"

exp_output_path.library_specifics = exp_output_path.exons_data_merge_plots + "library_specifics/"


exp_output_path.Parse_ENSEMBLE_GENCODE_exons_load = exp_output_path.exons_data_merge_plots + "Parse_ENSEMBLE_GENCODE_exons_load/"



exp_output_path.calculate_chromosome_coverage = exp_output_path.exons_data_merge_plots + "calculate_chromosome_coverage/"



exp_output_path.Dfam_repeats = exp_output_path.exons_data_merge_plots + "Dfam_repeats/"



exp_output_path.HEXEvent = exp_output_path.exons_data_merge_plots + "HEXEvent/"

exp_output_path.exon_id_conservations = exp_output_path.exons_data_merge_plots + "exon_id_conservations/"

exp_output_path.weblogo = exp_output_path.exons_data_merge_plots + "weblogo/"

exp_output_path.Parse_ENSEMBLE_GENCODE_exons_load_figures_2 = exp_output_path.exons_data_merge_plots + "Parse_ENSEMBLE_GENCODE_exons_load_figures_2/"

exp_output_path.ESE_scan = exp_output_path.exons_data_merge_plots + "ESE_scan/"


exp_output_path.rewrite_exon_number_expectation = exp_output_path.exons_data_merge_plots + "rewrite_exon_number_expectation/"


exp_output_path.Compute_spliceAI_exon_id_scratch = exp_output_path.exons_data_merge_plots + "Compute_spliceAI_exon_id_scratch/"

exp_output_path.proportion_bases_different_annotations = exp_output_path.exons_data_merge_plots + "proportion_bases_different_annotations/"



exp_output_path.load_merged_aggregate_exon_dict_figures_2 = exp_output_path.exons_data_merge_plots + "load_merged_aggregate_exon_dict_figures_2/"





exp_output_path.region_scores = exp_output_path.exons_data_merge_plots + "region_scores/"


exp_output_path.Compute_spliceAI_exon_id_scratch = exp_output_path.exons_data_merge_plots + "Compute_spliceAI_exon_id_scratch/"



exp_output_path.libraries_per_exon = exp_output_path.exons_data_merge_plots + "libraries_per_exon/"


exp_output_path.recovery_ratio_lncRNA_pc = exp_output_path.exons_data_merge_plots + "recovery_ratio_lncRNA_pc/"







exp_output_path.snaptron_exon = exp_output_path.exons_data_merge_plots + "snaptron_exon/"

















a = vars(exp_output_path)
for path in vars(exp_output_path):
    if a[path] == exp_output_path.git_repo:
        continue
    #print(a[path])
    make_dir_path(a[path])


print('Finished loading experiment paths')







''' DEFINE PATHS TO OTHER FILES '''


