##This simple script allows you to draw a Venn diagram 
##showing the overlap between elements across groups of your choice
##You can also inspect the elements shared for each combination as they are saved into vectors
# Load packages
library(gplots)
library(VennDiagram)
library(gridExtra)
library(purrr)
grid.newpage()

# Load Table
IES_tab <- read.table(' path to IES table "Supplemental Table S2" ', 
                      h = T, stringsAsFactors = F)
IES_tab <- IES_tab[(IES_tab$IES_PLUS_25F0 + IES_tab$IES_MINUS_25F0) > 20 & 
                     (IES_tab$IES_PLUS_25F1 + IES_tab$IES_MINUS_25F1) > 20 &
                     (IES_tab$IES_PLUS_18F1 + IES_tab$IES_MINUS_18F1) > 20 &
                     (IES_tab$IES_PLUS_32F1 + IES_tab$IES_MINUS_32F1) > 20, ]

#****************************************************************************************
## In silico IES retention_Theoretical overlap between samples (4-way-shared)
## 1000 simulated retention profiles based on random draws from the PGM-set 
#****************************************************************************************
## Rando_set
grid.newpage()
# Create a list with samples as columns
l=length(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) &
                          !duplicated(IES_tab$IES_ID) &
                          IES_tab$IRS_25F0 > 0])
l1=length(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) &
                           !duplicated(IES_tab$IES_ID) &
                           IES_tab$IRS_25F1 > 0])
l2=length(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) &
                           !duplicated(IES_tab$IES_ID) &
                           IES_tab$IRS_18F1 > 0])
l3=length(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) &
                           !duplicated(IES_tab$IES_ID) &
                           IES_tab$IRS_32F1 > 0])

c(l,l1,l2,l3)
trials <- seq(1,1000,1)
InterSec <- ""
Grand_Overlap_size <- ""
for(i in 1:length(trials)){
  x=sample(as.character(na.omit(IES_tab$IES_ID)), l, replace = F)
  y=sample(as.character(na.omit(IES_tab$IES_ID)), l1, replace = F)
  z=sample(as.character(na.omit(IES_tab$IES_ID)), l2, replace = F)
  w=sample(as.character(na.omit(IES_tab$IES_ID)), l3, replace = F)
  n <- max(length(x), length(y), length(z), length(w))
  length(x) <- n
  length(y) <- n
  length(z) <- n
  length(w) <- n
  IES_list3 <- cbind(x, y, z, w)
  colnames(IES_list3) <- c("Rando_SET_25F0", "Rando_SET_25F1", "Rando_SET_32F1", "Rando_SET_18F1")
  vennplot3 <- venn.diagram(x=list(Rando_SET_25F0=na.omit(IES_list3[,1]),
                                   Rando_SET_25F1=na.omit(IES_list3[,2]),
                                   Rando_SET_32F1=na.omit(IES_list3[,3]),
                                   Rando_SET_18F1=na.omit(IES_list3[,4])),
                            NULL)
  input3 <- list(na.omit(IES_list3[,1]), na.omit(IES_list3[,2]), na.omit(IES_list3[,3]), na.omit(IES_list3[,4]))
  d <- venn(input3, show.plot=FALSE)
  str(d)
  InterSec[[length(InterSec)+1]] <- attr(d,"intersections")$`A`[i]
  Grand_Overlap_size[[length(Grand_Overlap_size)+1]] <- length(reduce(list(x,y,z,w), intersect))
}

max(as.numeric(Grand_Overlap_size[-1]))
InterSec <- InterSec[-1]
InterSec

# bind IES groups_bind together vectors of different lengths, introduces NAs
n <- max(length(x), length(y), length(z), length(w))
length(x) <- n
length(y) <- n
length(z) <- n
length(w) <- n

IES_list3 <- cbind(x, y, z, w)
colnames(IES_list3) <- c("Rando_SET_25F0", "Rando_SET_25F1", "Rando_SET_32F1", "Rando_SET_18F1")

# Omit NAs
vennplot3 <- venn.diagram(x=list(Rando_SET_25F0=na.omit(IES_list3[,1]),
                                 Rando_SET_25F1=na.omit(IES_list3[,2]),
                                 Rando_SET_32F1=na.omit(IES_list3[,3]),
                                 Rando_SET_18F1=na.omit(IES_list3[,4])),
                          NULL, col="transparent", alpha=0.5, fill=c("dodgerblue","goldenrod1", "light blue", "pink"), margin=0.2)

# Draw the Venn diagram
grid.draw(vennplot3)

# Add legend
grid.arrange(gTree(children=vennplot3),
             bottom = "Expected Size of Overlap Across Samples of miss-spliced IES (Random Set)", 
             top = "")

# Now you can inspect the elements in the intersection
input3 <- list(na.omit(IES_list3[,1]), na.omit(IES_list3[,2]), na.omit(IES_list3[,3]), na.omit(IES_list3[,4]))
d <- venn(input3, show.plot=FALSE)
str(d)
inters3 <- attr(d,"intersections")


#****************************************************************************************
## Observed overlap between samples (4-way-shared)
#****************************************************************************************
## Full PGM set (IRS > 0)
grid.newpage()
# Create a list with samples as columns
x=as.character(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) & 
                                !duplicated(IES_tab$IES_ID) &
                                IES_tab$IRS_25F0 > 0])
y=as.character(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) & 
                                !duplicated(IES_tab$IES_ID) &
                                IES_tab$IRS_25F1 > 0])
z=as.character(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) & 
                                !duplicated(IES_tab$IES_ID) &
                                IES_tab$IRS_32F1 > 0])
w=as.character(IES_tab$IES_ID[!is.na(IES_tab$IES_ID) & 
                                !duplicated(IES_tab$IES_ID) &
                                IES_tab$IRS_18F1 > 0])


# bind IES groups_bind together vectors of different lengths, introduces NAs
n <- max(length(x), length(y), length(z), length(w))
length(x) <- n
length(y) <- n
length(z) <- n
length(w) <- n

IES_list2 <- cbind(x, y, z, w)
colnames(IES_list2) <- c("PGM_SET_25F0", "PGM_SET_25F1", "PGM_SET_32F1", "PGM_SET_18F1")

# Don't forget to omit NAs
vennplot2 <- venn.diagram(x=list(PGM_SET_25F0=na.omit(IES_list2[,1]),
                                 PGM_SET_25F1=na.omit(IES_list2[,2]),
                                 PGM_SET_32F1=na.omit(IES_list2[,3]),
                                 PGM_SET_18F1=na.omit(IES_list2[,4])),
                          NULL, col="transparent", alpha=0.5, fill=c("dodgerblue","light blue", "pink2", "dark green" ), margin=0.2)

vennplot2 <- venn.diagram(x=list(PGM_SET_25F0=na.omit(IES_list2[,1]),
                                 PGM_SET_25F1=na.omit(IES_list2[,2]),
                                 PGM_SET_32F1=na.omit(IES_list2[,3]),
                                 PGM_SET_18F1=na.omit(IES_list2[,4])),
                          NULL, col="transparent", alpha=0.5, fill=c("dodgerblue","light blue", "pink2", "dark green"), margin=0.2, category = c("", "", "", ""))

# Draw the Venn diagram
grid.draw(vennplot2)

# Add legend
grid.arrange(gTree(children=vennplot2),
             bottom = "Overlap of miss-spliced IES across samples", 
             top = "")

# Now you can inspect the elements in the intersection
input2 <- list(na.omit(IES_list2[,1]), na.omit(IES_list2[,2]), na.omit(IES_list2[,3]), na.omit(IES_list2[,4]))
c <- venn(input2, show.plot=FALSE)

str(c)
inters2 <- attr(c,"intersections")
inters2
