#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import matplotlib.font_manager
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as stats
#get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


plt.rcParams["font.family"] = "Arial"


# In[3]:


ies = pd.read_excel("/path/to/Supplemental_Table_S2.xlsx",
                  sep = '\t')


# In[4]:


for x in ['25F0', '25F1', '18F1', '32F1', '27_LA', '27_EV', '27_ND7', '27_AR'] :
    ies['IES_COV_' + x] = ies['IES_PLUS_' + x] + ies['IES_MINUS_' + x]


# In[5]:


edges = np.arange(-0.1,1.05,0.1)
edges = np.insert(edges, 2, 0.05)


# In[6]:


irs_in_bins = ies[ies.IES_ID.isna() == False].filter(
    regex = 'IRS_').apply(lambda x : pd.cut(x, edges, labels = edges[1:],
                                            include_lowest=True))


# In[7]:


irs_in_bins = pd.merge(irs_in_bins, ies[ies.IES_ID.isna() == False].filter(regex='IES_ID$|COV'),
                       left_index=True, right_index=True)


# In[8]:


y_18_F1 = irs_in_bins[irs_in_bins.IES_COV_18F1 >= 20].IRS_18F1.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_18F1 >= 20].IRS_18F1.value_counts().sum())
y_32_F1 = irs_in_bins[irs_in_bins.IES_COV_32F1 >= 20].IRS_32F1.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_32F1 >= 20].IRS_32F1.value_counts().sum())


# In[9]:


y_25F0 = irs_in_bins[irs_in_bins.IES_COV_25F0 >= 20].IRS_25F0.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_25F0 >= 20].IRS_25F0.value_counts().sum())
y_25F1 = irs_in_bins[irs_in_bins.IES_COV_25F1 >= 20].IRS_25F1.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_25F1 >= 20].IRS_25F1.value_counts().sum())
y_27_ar = irs_in_bins[irs_in_bins.IES_COV_27_AR >= 20].IRS_27_AR.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_27_AR >= 20].IRS_27_AR.value_counts().sum())
y_27_la = irs_in_bins[irs_in_bins.IES_COV_27_LA >= 20].IRS_27_LA.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_27_LA >= 20].IRS_27_LA.value_counts().sum())
y_27_nd = irs_in_bins[irs_in_bins.IES_COV_27_ND7 >= 20].IRS_27_ND7.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_27_ND7 >= 20].IRS_27_ND7.value_counts().sum())
y_27_ev = irs_in_bins[irs_in_bins.IES_COV_27_EV >= 20].IRS_27_EV.value_counts().sort_index()/(
    irs_in_bins[irs_in_bins.IES_COV_27_EV >= 20].IRS_27_EV.value_counts().sum())


# In[10]:


y_ctrl = np.mat([y_25F0, y_25F1, y_27_ar, y_27_ev, y_27_la, y_27_nd])
y_ctrl_mean = np.asarray(np.mean(y_ctrl, axis = 0)).flatten()
y_ctrl_se = stats.sem(y_ctrl, axis = 0)


# In[11]:


complete = np.mean([y_18_F1[0], y_25F0[0], y_25F1[0], y_32_F1[0], y_27_ar[0], y_27_la[0], y_27_nd[0], y_27_ev[0]])
somatic = np.mean([y_18_F1[3:].sum(), y_25F0[3:].sum(), y_25F1[3:].sum(), y_32_F1[3:].sum(), y_27_ar[3:].sum(),
                   y_27_la[3:].sum(), y_27_nd[3:].sum(), y_27_ev[3:].sum()])
incomplete = 1 - (complete + somatic)


# In[12]:


milord_25F0 = pd.read_csv("/path/to/MILORD_25F0.tab", header = 0, sep = '\t', low_memory = False)
milord_25F1 = pd.read_csv("/path/to/MILORD_25F1.tab", header = 0, sep = '\t', low_memory = False)
milord_18F1 = pd.read_csv("/path/to/MILORD_18F1.tab", header = 0, sep = '\t', low_memory = False)
milord_32F1 = pd.read_csv("/path/to/MILORD_32F1.tab", header = 0, sep = '\t', low_memory = False)


# In[13]:


ds_in_bins_25F0 = milord_25F0[milord_25F0.ID.isna() == False].filter(
    regex = 'score').apply(lambda x : pd.cut(x, edges, labels = edges[1:],
                                            include_lowest=True))
ds_in_bins_25F1 = milord_25F1[milord_25F1.ID.isna() == False].filter(
    regex = 'score').apply(lambda x : pd.cut(x, edges, labels = edges[1:],
                                            include_lowest=True))
ds_in_bins_18F1 = milord_18F1[milord_18F1.ID.isna() == False].filter(
    regex = 'score').apply(lambda x : pd.cut(x, edges, labels = edges[1:],
                                            include_lowest=True))
ds_in_bins_32F1 = milord_32F1[milord_32F1.ID.isna() == False].filter(
    regex = 'score').apply(lambda x : pd.cut(x, edges, labels = edges[1:],
                                            include_lowest=True))


# In[14]:


ds_in_bins_25F0 = pd.merge(ds_in_bins_25F0, milord_25F0[milord_25F0.ID.isna() == False].filter(regex='ID|support'),
                       left_index=True, right_index=True)
ds_in_bins_25F1 = pd.merge(ds_in_bins_25F1, milord_25F1[milord_25F1.ID.isna() == False].filter(regex='ID|support'),
                       left_index=True, right_index=True)
ds_in_bins_18F1 = pd.merge(ds_in_bins_18F1, milord_18F1[milord_18F1.ID.isna() == False].filter(regex='ID|support'),
                       left_index=True, right_index=True)
ds_in_bins_32F1 = pd.merge(ds_in_bins_32F1, milord_32F1[milord_32F1.ID.isna() == False].filter(regex='ID|support'),
                       left_index=True, right_index=True)


# In[15]:


y_25F0_ds = ds_in_bins_25F0[(ds_in_bins_25F0.support_variant +
                             ds_in_bins_25F0.support_ref) >= 20].deletion_score.value_counts().sort_index()
y_25F1_ds = ds_in_bins_25F1[(ds_in_bins_25F1.support_variant +
                             ds_in_bins_25F1.support_ref) >= 20].deletion_score.value_counts().sort_index()
y_18F1_ds = ds_in_bins_18F1[(ds_in_bins_18F1.support_variant +
                             ds_in_bins_18F1.support_ref) >= 20].deletion_score.value_counts().sort_index()
y_32F1_ds = ds_in_bins_32F1[(ds_in_bins_32F1.support_variant +
                             ds_in_bins_32F1.support_ref) >= 20].deletion_score.value_counts().sort_index()


# In[16]:


fig, axes = plt.subplots(ncols = 2)

# subplot for IES retention (A)
# barplot
x = np.arange(0, len(y_18_F1), 1) 
ax1 = fig.add_axes([0, 0, 1, 1])
ax1.bar(x, height = y_ctrl_mean, color ='skyblue', label = '25/27°C', width = 0.2, yerr = y_ctrl_se)
ax1.bar(x+0.21, height = y_18_F1, width = 0.2, color = 'limegreen', label = '18°C')
ax1.bar(x+0.42, height = y_32_F1, width = 0.2, color = 'red', label = '32°C')
ax1.set_xticks(x+0.21)
ax1.set_xticklabels(['0','0-5','5-10','10-20','20-30','30-40','40-50','50-60','60-70',
                     '70- 80','80-90','90-100'], rotation = 45, fontsize = '16')
ax1.set_xlabel('IRS class (%)', fontsize = '16')
ax1.legend(loc = 'center right', fontsize = '16', frameon = False, handlelength = 0.7)
ax1.set_ylabel("Percent of Total IESs", fontsize = '16')
ax1.set_yscale('log')
ax1.set_yticklabels(['0', '0', '0.1', '1', '10', '100'], fontsize = '14')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.text(-0.1, 1.15, 'A', transform=ax1.transAxes, fontsize=18, fontweight='bold', va='top', ha='right')
plt.title('Incompletely excised IESs', pad = 0, fontsize = '18', fontweight = 'bold')

# pie chart
ax2 = fig.add_axes([0.75,0.65,0.3,0.3])
sizes = [complete, incomplete, somatic]
ax2.pie(sizes, startangle = 165, colors = ['lightgrey', 'grey', 'black'],
        labels = ['', '', ''], textprops={'size': 'smaller'}, labeldistance = 0.1)
ax2.text(-0.5, -0.5, 'IRS = 0', fontsize = '12')


# subplot for cryptic IESs (B)
x2 = np.arange(1, len(y_18_F1), 1)
ax3 = fig.add_axes([1.25, 0, 1, 1]) 
ax3.spines['right'].set_visible(False)
ax3.spines['top'].set_visible(False)
ax3.text(1.1, 1.15, 'B', transform=ax1.transAxes, fontsize=18, fontweight='bold', va='top', ha='right')
ax3.bar(x2, height = y_25F0_ds[1:], color ='blue', label = '25°C F0', width = 0.2)
ax3.bar(x2+0.19, height = y_25F1_ds[1:], width = 0.19, color = 'skyblue', label = '25°C F1')
ax3.bar(x2+0.38, height = y_18F1_ds[1:], width = 0.19, color = 'limegreen', label = '18°C F1')
ax3.bar(x2+0.57, height = y_32F1_ds[1:], width = 0.19, color = 'red', label = '32°C F1')
ax3.set_yscale('log')
ax3.set_xticks(x2+0.19)
ax3.set_xticklabels(['0-5','5-10','10-20','20-30','30-40','40-50','50-60','60-70',
                      '70- 80','80-90','90-100'], rotation = 45, fontsize = '16')
ax3.set_xlabel('DS class (%)', fontsize = '16')
ax3.legend(loc = 'upper right', fontsize = '16', frameon = False, handlelength = 0.7)
ax3.set_ylabel("Number of cryptic IES excisions", fontsize = '16')
ax3.set_ylim([0, 10000])
ax3.set_yscale('log')
ax3.set_yticklabels(['0', '0', '10', '100', '1000', '10000'], fontsize = '16')
plt.title('Cryptic IESs', pad = 0, fontsize = '18', fontweight = 'bold')

plt.savefig("/path/to/save/Fig1.tiff", bbox_inches = 'tight', dpi = 600)




