library(binom)
library(ggplot2)
library(ggthemes)
# Load IES table
IES_tab <- read.table(' path to IES table "Supplemental Table S2" ', h = T, stringsAsFactors = F)

#*******************************************************************************************************************************************
#********* binomial test for retention scores comparison and MA plot for visualization of differentially spliced IES 32F1 vs 25 F0 *********
#*******************************************************************************************************************************************
IES_tab <- IES_tab[(IES_tab$IES_PLUS_25F0 + IES_tab$IES_MINUS_25F0) >= 20 & (IES_tab$IES_PLUS_32F1 + IES_tab$IES_MINUS_32F1) >= 20, ]

REF_upper=binom.confint(x=na.omit(IES_tab$IES_PLUS_25F0), 
                        n=(na.omit(IES_tab$IES_PLUS_25F0) 
                           + na.omit(IES_tab$IES_MINUS_25F0)), 
                        methods = "exact", conf.level = 0.75)$upper

REF_lower=binom.confint(x=na.omit(IES_tab$IES_PLUS_25F0), 
                        n=(na.omit(IES_tab$IES_PLUS_25F0) 
                           + na.omit(IES_tab$IES_MINUS_25F0)), 
                        methods = "exact", conf.level = 0.75)$lower


#REF_upper <- as.numeric(as.character(na.omit(IES_tab$IRS_25F0)))

# Build a table with useful info to which we will add the Padj values and the significance 
RF=cbind(as.character(na.omit(IES_tab$IES_ID)),
         REF_lower,
         as.character(na.omit(IES_tab$IRS_25F0)),
         REF_upper,
         na.omit(IES_tab$IRS_32F1),
         na.omit(IES_tab$IES_PLUS_25F0),
         na.omit(IES_tab$IES_PLUS_32F1),
         (na.omit(IES_tab$IES_PLUS_25F0) + na.omit(IES_tab$IES_MINUS_25F0)), 
         (na.omit(IES_tab$IES_PLUS_32F1) + na.omit(IES_tab$IES_MINUS_32F1)),
         as.character(na.omit(IES_tab$IES_LOCATION)),
         as.numeric(as.character(na.omit(IES_tab$IRS_25F1))),
         as.numeric(as.character(na.omit(IES_tab$IRS_18F1))),
         as.numeric(as.character((IES_tab$exprM[!is.na(IES_tab$IES_ID)]))),
         as.character((IES_tab$GENE_ID[!is.na(IES_tab$IES_ID)])),
         as.numeric(as.character((IES_tab$IES_LEN[!is.na(IES_tab$IES_ID)]))),
         as.character(as.character((IES_tab$PTC_IS_DOWNSTREAM[!is.na(IES_tab$IES_ID)]))),
         as.character(as.character((IES_tab$PTC_INDUCED[!is.na(IES_tab$IES_ID)]))),
         IES_tab$PTC_INDUCED_DIST_TO_CDS_START[!is.na(IES_tab$IES_ID)],
         IES_tab$GENE_LEN[!is.na(IES_tab$IES_ID)])
RF <- as.data.frame(RF)

colnames(RF) <- c("IES_ID", 
                  "REF_lower",
                  "IRS_25F0",
                  "REF_upper",
                  "IRS_32F1", 
                  "IES_PLUS_25F0", 
                  "IES_PLUS_32F1", 
                  "Total_25F0", 
                  "Total32F1",
                  "IES_LOCATION",
                  "IRS_25F1",
                  "IRS_18F1",
                  "exprM",
                  "GENE_ID",
                  "IES_LEN",
                  "PTC_IS_DOWNSTREAM",
                  "PTC_INDUCED",
                  "PTC_INDUCED_DIST_TO_CDS_START",
                  "GENE_LEN")


#*******************************************************************************************************************************
## Test upward IRS transitions, IRS against upper bound
#*******************************************************************************************************************************
# Builds a binomial table with the REF values (upper bound of 95% C.I.) IES+ reads and Total reads from the sample to be tested
binomtable=cbind(REF_upper, na.omit(IES_tab$IES_PLUS_32F1), (na.omit(IES_tab$IES_PLUS_32F1) + na.omit(IES_tab$IES_MINUS_32F1)))
#colnames(binomtable) <- c("Expected", "Success", "Trials")

# Perform a binomial test where appropriate
get_Freq_Test_Pval=function(tab){
  if(is.na(tab[1]) | (tab[2]==0 & tab[3]==0)){return (NA)}
  else{return (binom.test(x=tab[2], n=tab[3], p=tab[1], alternative="greater")$p.value)}
}

# Apply the function to the binomial table
pvalues=apply(binomtable, 1, 'get_Freq_Test_Pval')
RF$padj=p.adjust(pvalues, method="BH")
RF$SIGNIFICANT = (RF$padj < 0.05 & !is.na(RF$padj) 
                  & (na.omit(IES_tab$IES_PLUS_25F0)+na.omit(IES_tab$IES_MINUS_25F0) >= 20) 
                  & (na.omit(IES_tab$IES_PLUS_32F1)+na.omit(IES_tab$IES_MINUS_32F1) >= 20))

length(RF$SIGNIFICANT[RF$SIGNIFICANT=='TRUE'])
# Store IDs of significantly reteined IESs into a new df
SIGN32_dist <- as.data.frame(RF[RF$SIGNIFICANT=='TRUE',])
SIGN32 <- as.data.frame(RF$IES_ID[RF$SIGNIFICANT=='TRUE'])
colnames(SIGN32) <- "IES_ID"
SIGN32$Temp <- "32"
SIGN32$Set <- 1

TAB <- data.frame(as.character(RF$IES_ID), 
                  as.numeric(as.character(RF$REF_lower)), 
                  as.numeric(as.character(RF$IRS_25F0)), 
                  as.numeric(as.character(RF$REF_upper)), 
                  as.numeric(as.character(RF$IRS_32F1)), 
                  as.character(RF$SIGNIFICANT), stringsAsFactors = FALSE)

colnames(TAB) <- c(names(RF)[1:5], names(RF)[21])


for(i in 1:length(TAB$IES_ID)) {
  if((TAB$IRS_32F1[i] > TAB$IRS_25F0[i]) && (TAB$IRS_32F1[i] > TAB$REF_upper[i])) {
    TAB$Control[i] <- TAB$REF_upper[i]
  }
  else if((TAB$IRS_32F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] > TAB$IRS_32F1[i])) {
    TAB$Control[i] <- TAB$REF_lower[i]
  }
  else if(TAB$IRS_32F1[i] == TAB$IRS_25F0[i]) {
    TAB$Control[i] <- TAB$IRS_25F0[i]
  }
  else if((TAB$IRS_32F1[i] > TAB$IRS_25F0[i]) && (TAB$REF_upper[i] > TAB$IRS_32F1[i])) {
    TAB$Control[i] <- 0
  }
  else if((TAB$IRS_32F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] < TAB$IRS_32F1[i])) {
    TAB$Control[i] <- 0
  }
}

# MA (Bland-Altman) plot to visualize significantly differentialy spliced IES between the two TEMP_SCORES
par(mar=c(5.1,6.1,4.1,2.1))
x= log2(as.numeric(as.character(TAB$Control)))
y= log2(as.numeric(as.character(TAB$IRS_32F1))/as.numeric(as.character(TAB$Control)))
plot(y~x, pch = 16,
     ylim = c(-6, 6),
     xlim= c(-6, 1),
     cex = 0.8,
     col = '#C0C0C0',
     ylab = "",
     xlab=expression("IRS"[F0]),
     axes = F,
     cex.lab = 2.0, cex.axis = 1)
ex = seq(-6, 0, 1)
ex_lab = round(2^ex, digits = 2)
axis(1, at = ex, labels = ex_lab, cex.axis = 1.6, line = -0.5)
why = seq(-6, 6, 1)
why_lab = c(seq(-6,-1), seq(0,6))
axis(2, at = why, labels = why_lab, cex.axis = 1.6, line = 0)
title(ylab=expression("IRS"[F1/F0]*" "*"(Log"[2]*")"), line=2.5, cex.lab=2.0)


x1=log2(as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))
y1=log2(as.numeric(as.character(TAB$IRS_32F1[TAB$SIGNIFICANT=='TRUE']))
        /as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))

points(y1~x1, col = 'fire brick', pch = 16, cex = 0.8)
abline(h = 0, col="black", lwd=3, lty=2)

#*******************************************************************************************************************************
## Test the opposite direction (downward IRS transitions, IRS against lower bound)
#*******************************************************************************************************************************
# Builds a binomial table with the REF values (upper bound of 95% C.I.) IES+ reads and Total reads from the sample to be tested
binomtable=cbind(REF_lower, na.omit(IES_tab$IES_PLUS_32F1), (na.omit(IES_tab$IES_PLUS_32F1) + na.omit(IES_tab$IES_MINUS_32F1)))
#colnames(binomtable) <- c("Expected", "Success", "Trials")

# Perform a binomial test where appropriate
get_Freq_Test_Pval=function(tab){
  if(is.na(tab[1]) | (tab[2]==0 & tab[3]==0)){return (NA)}
  else{return (binom.test(x=tab[2], n=tab[3], p=tab[1], alternative="less")$p.value)}
}

# Apply the function to the binomial table
pvalues=apply(binomtable, 1, 'get_Freq_Test_Pval')
RF$padj=p.adjust(pvalues, method="BH")
RF$SIGNIFICANT = (RF$padj < 0.05 & !is.na(RF$padj) 
                  & (na.omit(IES_tab$IES_PLUS_25F0)+na.omit(IES_tab$IES_MINUS_25F0) >= 20) 
                  & (na.omit(IES_tab$IES_PLUS_32F1)+na.omit(IES_tab$IES_MINUS_32F1) >= 20))

length(RF$SIGNIFICANT[RF$SIGNIFICANT=='TRUE'])
# Store IDs of significantly reteined IESs into a new df
SIGN32_d <- as.data.frame(RF$IES_ID[RF$SIGNIFICANT=='TRUE'])
colnames(SIGN32_d) <- "IES_ID"
SIGN32_d$Temp <- "32"
SIGN32_d$Set <- -1

TAB <- data.frame(as.character(RF$IES_ID), 
                  as.numeric(as.character(RF$REF_lower)), 
                  as.numeric(as.character(RF$IRS_25F0)), 
                  as.numeric(as.character(RF$REF_upper)), 
                  as.numeric(as.character(RF$IRS_32F1)), 
                  as.character(RF$SIGNIFICANT), stringsAsFactors = FALSE)

colnames(TAB) <- c(names(RF)[1:5], names(RF)[21])


for(i in 1:length(TAB$IES_ID)) {
  if((TAB$IRS_32F1[i] > TAB$IRS_25F0[i]) && (TAB$IRS_32F1[i] > TAB$REF_upper[i])) {
    TAB$Control[i] <- TAB$REF_upper[i]
  }
  else if((TAB$IRS_32F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] > TAB$IRS_32F1[i])) {
    TAB$Control[i] <- TAB$REF_lower[i]
  }
  else if(TAB$IRS_32F1[i] == TAB$IRS_25F0[i]) {
    TAB$Control[i] <- TAB$IRS_25F0[i]
  }
  else if((TAB$IRS_32F1[i] > TAB$IRS_25F0[i]) && (TAB$REF_upper[i] > TAB$IRS_32F1[i])) {
    TAB$Control[i] <- 0
  }
  else if((TAB$IRS_32F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] < TAB$IRS_32F1[i])) {
    TAB$Control[i] <- 0
  }
}

# MA (Bland-Altman) plot to visualize significantly differentialy spliced IES between the two TEMP_SCORES
x1=log2(as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))
y1=log2(as.numeric(as.character(TAB$IRS_32F1[TAB$SIGNIFICANT=='TRUE']))
        /as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))

points(y1~x1, col = 'orange', pch = 16, cex = 0.8)
abline(h = 0, col="black", lwd=3, lty=2)

#*******************************************************************************************************************************************
#********* binomial test for retention scores comparison and MA plot for visualization of differentially spliced IES 18F1 vs 25 F0 *********
#*******************************************************************************************************************************************

# control MIRET.tab from wich x=support_IES and n=supportMAC+supportIES are taken to calculate P (upper bound conf.interval)
REF_upper=binom.confint(x=na.omit(IES_tab$IES_PLUS_25F0), 
                        n=(na.omit(IES_tab$IES_PLUS_25F0) 
                           + na.omit(IES_tab$IES_MINUS_25F0)), 
                        methods = "exact", conf.level = 0.75)$upper

REF_lower=binom.confint(x=na.omit(IES_tab$IES_PLUS_25F0), 
                        n=(na.omit(IES_tab$IES_PLUS_25F0) 
                           + na.omit(IES_tab$IES_MINUS_25F0)), 
                        methods = "exact", conf.level = 0.75)$lower

# Build a table with useful info to which we will add the Padj values and the significance 
RF=cbind(as.character(na.omit(IES_tab$IES_ID)),
         REF_lower,
         as.character(na.omit(IES_tab$IRS_25F0)),
         REF_upper,
         na.omit(IES_tab$IRS_18F1),
         na.omit(IES_tab$IES_PLUS_25F0),
         na.omit(IES_tab$IES_PLUS_18F1),
         (na.omit(IES_tab$IES_PLUS_25F0) + na.omit(IES_tab$IES_MINUS_25F0)), 
         (na.omit(IES_tab$IES_PLUS_18F1) + na.omit(IES_tab$IES_MINUS_18F1)),
         as.character(na.omit(IES_tab$IES_LOCATION)),
         as.numeric(as.character(na.omit(IES_tab$IRS_25F1))),
         as.numeric(as.character(na.omit(IES_tab$IRS_32F))),
         as.numeric(as.character((IES_tab$exprM[!is.na(IES_tab$IES_ID)]))),
         as.character((IES_tab$GENE_ID[!is.na(IES_tab$IES_ID)])),
         as.numeric(as.character((IES_tab$IES_LEN[!is.na(IES_tab$IES_ID)]))),
         as.character(as.character((IES_tab$PTC_IS_DOWNSTREAM[!is.na(IES_tab$IES_ID)]))),
         as.character(as.character((IES_tab$PTC_INDUCED[!is.na(IES_tab$IES_ID)]))),
         IES_tab$PTC_INDUCED_DIST_TO_CDS_START[!is.na(IES_tab$IES_ID)],
         IES_tab$GENE_LEN[!is.na(IES_tab$IES_ID)])

RF <- as.data.frame(RF)

colnames(RF) <- c("IES_ID",
                  "REF_lower",
                  "IRS_25F0",
                  "REF_upper",
                  "IRS_18F1",
                  "IES_PLUS_25F0",
                  "IES_PLUS_18F1",
                  "Total_25F0",
                  "Total18F1",
                  "IES_LOCATION",
                  "IRS_25F1",
                  "IRS_32F1",
                  "exprM",
                  "GENE_ID",
                  "IES_LEN",
                  "PTC_IS_DOWNSTREAM",
                  "PTC_INDUCED",
                  "PTC_INDUCED_DIST_TO_CDS_START",
                  "GENE_LEN")


#*******************************************************************************************************************************
## Test upward IRS transitions, IRS against upper bound
#*******************************************************************************************************************************
# Builds a binomial table with the REF values (upper bound of 95% C.I.) IES+ reads and Total reads from the sample to be tested
binomtable=cbind(REF_upper, na.omit(IES_tab$IES_PLUS_18F1), (na.omit(IES_tab$IES_PLUS_18F1) + na.omit(IES_tab$IES_MINUS_18F1)))
#colnames(binomtable) <- c("Expected", "Success", "Trials")

# Perform a binomial test where appropriate
get_Freq_Test_Pval=function(tab){
  if(is.na(tab[1]) | (tab[2]==0 & tab[3]==0)){return (NA)}
  else{return (binom.test(x=tab[2], n=tab[3], p=tab[1], alternative="greater")$p.value)}
}

# Apply the function to the binomial table
pvalues=apply(binomtable, 1, 'get_Freq_Test_Pval')
RF$padj=p.adjust(pvalues, method="BH")
RF$SIGNIFICANT = (RF$padj < 0.05 & !is.na(RF$padj) 
                  & (na.omit(IES_tab$IES_PLUS_25F0)+na.omit(IES_tab$IES_MINUS_25F0) >= 20) 
                  & (na.omit(IES_tab$IES_PLUS_18F1)+na.omit(IES_tab$IES_MINUS_18F1) >= 20))

length(RF$SIGNIFICANT[RF$SIGNIFICANT=='TRUE'])
# Store IDs of significantly reteined IESs into a new df
SIGN18_dist <- as.data.frame(RF[RF$SIGNIFICANT=='TRUE',])
SIGN18 <- as.data.frame(RF$IES_ID[RF$SIGNIFICANT=='TRUE'])
colnames(SIGN18) <- "IES_ID"
SIGN18$Temp <- "18"
SIGN18$Set <- 1

TAB <- data.frame(as.character(RF$IES_ID), 
                  as.numeric(as.character(RF$REF_lower)), 
                  as.numeric(as.character(RF$IRS_25F0)), 
                  as.numeric(as.character(RF$REF_upper)), 
                  as.numeric(as.character(RF$IRS_18F1)), 
                  as.character(RF$SIGNIFICANT), stringsAsFactors = FALSE)

colnames(TAB) <- c(names(RF)[1:5], names(RF)[21])


for(i in 1:length(TAB$IES_ID)) {
  if((TAB$IRS_18F1[i] > TAB$IRS_25F0[i]) && (TAB$IRS_18F1[i] > TAB$REF_upper[i])) {
    TAB$Control[i] <- TAB$REF_upper[i]
  }
  else if((TAB$IRS_18F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] > TAB$IRS_18F1[i])) {
    TAB$Control[i] <- TAB$REF_lower[i]
  }
  else if(TAB$IRS_18F1[i] == TAB$IRS_25F0[i]) {
    TAB$Control[i] <- TAB$IRS_25F0[i]
  }
  else if((TAB$IRS_18F1[i] > TAB$IRS_25F0[i]) && (TAB$REF_upper[i] > TAB$IRS_18F1[i])) {
    TAB$Control[i] <- 0
  }
  else if((TAB$IRS_18F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] < TAB$IRS_18F1[i])) {
    TAB$Control[i] <- 0
  }
}

par(mar=c(5.1,6.1,4.1,2.1))
x= log2(as.numeric(as.character(TAB$Control)))
y= log2(as.numeric(as.character(TAB$IRS_18F1))/as.numeric(as.character(TAB$Control)))
plot(y~x, pch = 16,
     ylim = c(-6, 6),
     cex = 0.8,
     col = '#C0C0C0',
     xlim= c(-6, 1),
     ylab="", 
     xlab=expression("IRS"[F0]),
     axes = F,
     cex.lab = 2.0, cex.axis = 1)
ex = seq(-6, 0, 1)
ex_lab = round(2^ex, digits = 2)
axis(1, at = ex, labels = ex_lab, cex.axis = 1.6, line = -0.5)
why = c(-6:6)
why_lab = c(seq(-6,-1), seq(0,6))
axis(2, at = why, labels = why_lab, cex.axis = 1.6, line = 0)
title(ylab=expression("IRS"[F1/F0]*" "*"(Log"[2]*")"), line=2.3, cex.lab=2.0)

x1=log2(as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))
y1=log2(as.numeric(as.character(TAB$IRS_18F1[TAB$SIGNIFICANT=='TRUE']))
        /as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))

points(y1~x1, col = 'sea green', pch = 16, cex = 0.8)
abline(h = 0, col="black", lwd=3, lty=2)

#*******************************************************************************************************************************
## Test the opposite direction (downward IRS transitions, IRS against lower bound)
#*******************************************************************************************************************************
# Builds a binomial table with the REF values (upper bound of 95% C.I.) IES+ reads and Total reads from the sample to be tested
binomtable=cbind(REF_lower, na.omit(IES_tab$IES_PLUS_18F1), (na.omit(IES_tab$IES_PLUS_18F1) + na.omit(IES_tab$IES_MINUS_18F1)))
#colnames(binomtable) <- c("Expected", "Success", "Trials")

# Perform a binomial test where appropriate
get_Freq_Test_Pval=function(tab){
  if(is.na(tab[1]) | (tab[2]==0 & tab[3]==0)){return (NA)}
  else{return (binom.test(x=tab[2], n=tab[3], p=tab[1], alternative="less")$p.value)}
}

# Apply the function to the binomial table
pvalues=apply(binomtable, 1, 'get_Freq_Test_Pval')
RF$padj=p.adjust(pvalues, method="BH")
RF$SIGNIFICANT = (RF$padj < 0.05 & !is.na(RF$padj) 
                  & (na.omit(IES_tab$IES_PLUS_25F0)+na.omit(IES_tab$IES_MINUS_25F0) >= 20) 
                  & (na.omit(IES_tab$IES_PLUS_18F1)+na.omit(IES_tab$IES_MINUS_18F1) >= 20))

length(RF$SIGNIFICANT[RF$SIGNIFICANT=='TRUE'])
# Store IDs of significantly reteined IESs into a new df
SIGN18_d <- as.data.frame(RF$IES_ID[RF$SIGNIFICANT=='TRUE'])
colnames(SIGN18_d) <- "IES_ID"
SIGN18_d$Temp <- "18"
SIGN18_d$Set <- -1

TAB <- data.frame(as.character(RF$IES_ID), 
                  as.numeric(as.character(RF$REF_lower)), 
                  as.numeric(as.character(RF$IRS_25F0)), 
                  as.numeric(as.character(RF$REF_upper)), 
                  as.numeric(as.character(RF$IRS_18F1)), 
                  as.character(RF$SIGNIFICANT), stringsAsFactors = FALSE)

colnames(TAB) <- c(names(RF)[1:5], names(RF)[21])


for(i in 1:length(TAB$IES_ID)) {
  if((TAB$IRS_18F1[i] > TAB$IRS_25F0[i]) && (TAB$IRS_18F1[i] > TAB$REF_upper[i])) {
    TAB$Control[i] <- TAB$REF_upper[i]
  }
  else if((TAB$IRS_18F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] > TAB$IRS_18F1[i])) {
    TAB$Control[i] <- TAB$REF_lower[i]
  }
  else if(TAB$IRS_18F1[i] == TAB$IRS_25F0[i]) {
    TAB$Control[i] <- TAB$IRS_25F0[i]
  }
  else if((TAB$IRS_18F1[i] > TAB$IRS_25F0[i]) && (TAB$REF_upper[i] > TAB$IRS_18F1[i])) {
    TAB$Control[i] <- 0
  }
  else if((TAB$IRS_18F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] < TAB$IRS_18F1[i])) {
    TAB$Control[i] <- 0
  }
}

x1=log2(as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))
y1=log2(as.numeric(as.character(TAB$IRS_18F1[TAB$SIGNIFICANT=='TRUE']))
        /as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))

points(y1~x1, col = '#00FA9A', pch = 16, cex = 0.8)
abline(h = 0, col="black", lwd=3, lty=2)

#*******************************************************************************************************************************************
#********* binomial test for retention scores comparison and MA plot for visualization of differentially spliced IES 25F1 vs 25 F0 *********
#*******************************************************************************************************************************************
# control MIRET.tab from wich x=support_IES and n=supportMAC+supportIES are taken to calculate P (upper bound conf.interval)
REF_upper=binom.confint(x=na.omit(IES_tab$IES_PLUS_25F0), 
                        n=(na.omit(IES_tab$IES_PLUS_25F0) 
                           + na.omit(IES_tab$IES_MINUS_25F0)), 
                        methods = "exact", conf.level = 0.75)$upper

REF_lower=binom.confint(x=na.omit(IES_tab$IES_PLUS_25F0), 
                        n=(na.omit(IES_tab$IES_PLUS_25F0) 
                           + na.omit(IES_tab$IES_MINUS_25F0)), 
                        methods = "exact", conf.level = 0.75)$lower

# Build a table with useful info to which we will add the Padj values and the significance 
RF=cbind(as.character(na.omit(IES_tab$IES_ID)),
         REF_lower,
         as.character(na.omit(IES_tab$IRS_25F0)),
         REF_upper,
         na.omit(IES_tab$IRS_25F1),
         na.omit(IES_tab$IES_PLUS_25F0),
         na.omit(IES_tab$IES_PLUS_25F1),
         (na.omit(IES_tab$IES_PLUS_25F0) + na.omit(IES_tab$IES_MINUS_25F0)), 
         (na.omit(IES_tab$IES_PLUS_25F1) + na.omit(IES_tab$IES_MINUS_25F1)),
         as.character(na.omit(IES_tab$IES_LOCATION)),
         as.numeric(as.character(na.omit(IES_tab$IRS_18F1))),
         as.numeric(as.character(na.omit(IES_tab$IRS_32F))),
         as.numeric(as.character((IES_tab$exprM[!is.na(IES_tab$IES_ID)]))),
         as.character((IES_tab$GENE_ID[!is.na(IES_tab$IES_ID)])),
         as.numeric(as.character((IES_tab$IES_LEN[!is.na(IES_tab$IES_ID)]))),
         as.character(as.character((IES_tab$PTC_IS_DOWNSTREAM[!is.na(IES_tab$IES_ID)]))),
         as.character(as.character((IES_tab$PTC_INDUCED[!is.na(IES_tab$IES_ID)]))),
         IES_tab$PTC_INDUCED_DIST_TO_CDS_START[!is.na(IES_tab$IES_ID)],
         IES_tab$GENE_LEN[!is.na(IES_tab$IES_ID)])

RF <- as.data.frame(RF)

colnames(RF) <- c("IES_ID",
                  "REF_lower",
                  "IRS_25F0",
                  "REF_upper",
                  "IRS_25F1",
                  "IES_PLUS_25F0",
                  "IES_PLUS_25F1",
                  "Total_25F0",
                  "Total25F1",
                  "IES_LOCATION",
                  "IRS_18F1",
                  "IRS_32F1",
                  "exprM",
                  "GENE_ID",
                  "IES_LEN",
                  "PTC_IS_DOWNSTREAM",
                  "PTC_INDUCED",
                  "PTC_INDUCED_DIST_TO_CDS_START",
                  "GENE_LEN")

#*******************************************************************************************************************************
## Test upward IRS transitions, IRS against upper bound
#*******************************************************************************************************************************
# Builds a binomial table with the REF values (upper bound of 95% C.I.) IES+ reads and Total reads from the sample to be tested
binomtable=cbind(REF_upper, na.omit(IES_tab$IES_PLUS_25F1), (na.omit(IES_tab$IES_PLUS_25F1) + na.omit(IES_tab$IES_MINUS_25F1)))

# Perform a binomial test where appropriate
get_Freq_Test_Pval=function(tab){
  if(is.na(tab[1]) | (tab[2]==0 & tab[3]==0)){return (NA)}
  else{return (binom.test(x=tab[2], n=tab[3], p=tab[1], alternative="greater")$p.value)}
}

# Apply the function to the binomial table
pvalues=apply(binomtable, 1, 'get_Freq_Test_Pval')
RF$padj=p.adjust(pvalues, method="BH")
RF$SIGNIFICANT = (RF$padj < 0.05 & !is.na(RF$padj) 
                  & (na.omit(IES_tab$IES_PLUS_25F0)+na.omit(IES_tab$IES_MINUS_25F0) >= 20) 
                  & (na.omit(IES_tab$IES_PLUS_25F1)+na.omit(IES_tab$IES_MINUS_25F1) >= 20))

length(RF$SIGNIFICANT[RF$SIGNIFICANT=='TRUE'])
# Store IDs of significantly reteined IESs into a new df
SIGN25_dist <- as.data.frame(RF[RF$SIGNIFICANT=='TRUE',])
SIGN25 <- as.data.frame(RF$IES_ID[RF$SIGNIFICANT=='TRUE'])
colnames(SIGN25) <- "IES_ID"
SIGN25$Temp <- "25"
SIGN25$Set <- 1

TAB <- data.frame(as.character(RF$IES_ID), 
                  as.numeric(as.character(RF$REF_lower)), 
                  as.numeric(as.character(RF$IRS_25F0)), 
                  as.numeric(as.character(RF$REF_upper)), 
                  as.numeric(as.character(RF$IRS_25F1)), 
                  as.character(RF$SIGNIFICANT), stringsAsFactors = FALSE)

colnames(TAB) <- c(names(RF)[1:5], names(RF)[21])


for(i in 1:length(TAB$IES_ID)) {
  if((TAB$IRS_25F1[i] > TAB$IRS_25F0[i]) && (TAB$IRS_25F1[i] > TAB$REF_upper[i])) {
    TAB$Control[i] <- TAB$REF_upper[i]
  }
  else if((TAB$IRS_25F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] > TAB$IRS_25F1[i])) {
    TAB$Control[i] <- TAB$REF_lower[i]
  }
  else if(TAB$IRS_25F1[i] == TAB$IRS_25F0[i]) {
    TAB$Control[i] <- TAB$IRS_25F0[i]
  }
  else if((TAB$IRS_25F1[i] > TAB$IRS_25F0[i]) && (TAB$REF_upper[i] > TAB$IRS_25F1[i])) {
    TAB$Control[i] <- 0
  }
  else if((TAB$IRS_25F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] < TAB$IRS_25F1[i])) {
    TAB$Control[i] <- 0
  }
}

par(mar=c(5.1,6.1,4.1,2.1))
x= log2(as.numeric(as.character(TAB$Control)))
y= log2(as.numeric(as.character(TAB$IRS_25F1))/as.numeric(as.character(TAB$Control)))
plot(y~x, pch = 16,
     ylim = c(-6, 6),
     cex = 0.8,
     col = '#C0C0C0',
     xlim= c(-6, 1),
     ylab="", 
     xlab=expression("IRS"[F0]),
     axes = F,
     cex.lab = 2.0, cex.axis = 1)
ex = seq(-6, 0, 1)
ex_lab = round(2^ex, digits = 2)
axis(1, at = ex, labels = ex_lab, cex.axis = 1.6, line = -0.5)
why = c(-6:6)
why_lab = c(seq(-6,-1), seq(0,6))
axis(2, at = why, labels = why_lab, cex.axis = 1.6, line = 0)
title(ylab=expression("IRS"[F1/F0]*" "*"(Log"[2]*")"), line=2.3, cex.lab=2.0)

x1=log2(as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))
y1=log2(as.numeric(as.character(TAB$IRS_25F1[TAB$SIGNIFICANT=='TRUE']))
        /as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))

points(y1~x1, col = 'dark blue', pch = 16, cex = 0.8)
abline(h = 0, col="black", lwd=3, lty=2)

#*******************************************************************************************************************************
## Test the opposite direction (downward IRS transitions, IRS against lower bound)
#*******************************************************************************************************************************
# Builds a binomial table with the REF values (upper bound of 95% C.I.) IES+ reads and Total reads from the sample to be tested
binomtable=cbind(REF_lower, na.omit(IES_tab$IES_PLUS_25F1), (na.omit(IES_tab$IES_PLUS_25F1) + na.omit(IES_tab$IES_MINUS_25F1)))

# Performs a binomial test where appropriate
get_Freq_Test_Pval=function(tab){
  if(is.na(tab[1]) | (tab[2]==0 & tab[3]==0)){return (NA)}
  else{return (binom.test(x=tab[2], n=tab[3], p=tab[1], alternative="less")$p.value)}
}

# Apply the function to the binomial table
pvalues=apply(binomtable, 1, 'get_Freq_Test_Pval')
RF$padj=p.adjust(pvalues, method="BH")
RF$SIGNIFICANT = (RF$padj < 0.05 & !is.na(RF$padj) 
                  & (na.omit(IES_tab$IES_PLUS_25F0)+na.omit(IES_tab$IES_MINUS_25F0) >= 20) 
                  & (na.omit(IES_tab$IES_PLUS_25F1)+na.omit(IES_tab$IES_MINUS_25F1) >= 20))

length(RF$SIGNIFICANT[RF$SIGNIFICANT=='TRUE'])
# Store IDs of significantly reteined IESs into a new df
SIGN25_d <- as.data.frame(RF$IES_ID[RF$SIGNIFICANT=='TRUE'])
colnames(SIGN25_d) <- "IES_ID"
SIGN25_d$Temp <- "25"
SIGN25_d$Set <- -1

TAB <- data.frame(as.character(RF$IES_ID), 
                  as.numeric(as.character(RF$REF_lower)), 
                  as.numeric(as.character(RF$IRS_25F0)), 
                  as.numeric(as.character(RF$REF_upper)), 
                  as.numeric(as.character(RF$IRS_25F1)), 
                  as.character(RF$SIGNIFICANT), stringsAsFactors = FALSE)

colnames(TAB) <- c(names(RF)[1:5], names(RF)[21])


for(i in 1:length(TAB$IES_ID)) {
  if((TAB$IRS_25F1[i] > TAB$IRS_25F0[i]) && (TAB$IRS_25F1[i] > TAB$REF_upper[i])) {
    TAB$Control[i] <- TAB$REF_upper[i]
  }
  else if((TAB$IRS_25F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] > TAB$IRS_25F1[i])) {
    TAB$Control[i] <- TAB$REF_lower[i]
  }
  else if(TAB$IRS_25F1[i] == TAB$IRS_25F0[i]) {
    TAB$Control[i] <- TAB$IRS_25F0[i]
  }
  else if((TAB$IRS_25F1[i] > TAB$IRS_25F0[i]) && (TAB$REF_upper[i] > TAB$IRS_25F1[i])) {
    TAB$Control[i] <- 0
  }
  else if((TAB$IRS_25F1[i] < TAB$IRS_25F0[i]) && (TAB$REF_lower[i] < TAB$IRS_25F1[i])) {
    TAB$Control[i] <- 0
  }
}

x1=log2(as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))
y1=log2(as.numeric(as.character(TAB$IRS_25F1[TAB$SIGNIFICANT=='TRUE']))
        /as.numeric(as.character(TAB$Control[TAB$SIGNIFICANT=='TRUE'])))

points(y1~x1, col = '#00BFFF', pch = 16, cex = 0.8)
abline(h = 0, col="black", lwd=3, lty=2)

#******************************************************************************
#************************** Summary Bar Chart *********************************
#******************************************************************************
test <- rbind(SIGN18, SIGN25, SIGN32)
test1 <- rbind(SIGN18_d, SIGN25_d, SIGN32_d)
# Combine dfs
combo_test <- rbind(test, test1)

## plot simple bar chart
g <- ggplot(test, aes(Temp))
g + geom_bar(aes(fill = Temp), width = 0.5) +
  scale_fill_brewer(palette="Spectral") +
  theme_tufte()+
  theme(axis.text.x = element_text(angle=0, vjust=0.3)) +
  expand_limits(y = c(0, 300)) +
  labs(title="Categorywise Bar Chart", 
       subtitle="Epigenetic Control", 
       caption="Source: IESs in putative promoters") +
  ylab("SIGN IESs (count)")

## plot a back to back bar chart
# X Axis Breaks and Labels 
brks <- seq(-300, 300, 50)
lbls = as.character(c(seq(300, 0, -50), seq(50, 300, 50)))

ggplot(combo_test, aes(x = Temp, y = Set, fill = Temp)) +   # Fill column
  geom_bar(stat = "identity", width = 0.85) +   # draw the bars +
  scale_y_continuous(breaks = brks, labels = lbls, limits = c(-100, 300), position = "left") +
  labs(title="Significant IRS Transitions") +
  theme_tufte() +  # Tufte theme from ggthemes
  scale_fill_manual(values = c(rgb(46/256, 139/256, 87/256, 0.7), rgb(0,0 ,139/256 ,0.7), rgb(178/256,34/256,34/256, 0.7))) +
  geom_line(aes(0, 0)) + 
  geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +
  ylab(expression("IES count"*" "*"(p"[adj]*" "*"<"*" "*"0.05"*")"))

