library(ggplot2)
library(dplyr)
library(reshape2)

setwd("~/cloud/for_yi/May11")

## Protein and cell types ###
prop_var_OCM <- read.table("./may11_prop_test.txt", header=T)# overlap? Cell_type? Method? 
prop_var_OCM[,"Ovp"] <- factor(prop_var_OCM$Ovp, levels=c("Overlap","non_overlap"))

##method
mymethod <- "Armatus"
prop_mymethod  <- prop_var_OCM %>% filter(Method==mymethod)
 cons_dat <- dcast(prop_mymethod, Protein + Cell_type + Method ~ Ovp, value.var = "Conserved")
 names(cons_dat)[4:5] <- c("cons_overp", "cons_notoverp")
 tot_dat <- dcast(prop_mymethod, Protein + Cell_type + Method ~ Ovp, value.var = "Total")
 names(tot_dat)[4:5] <- c("tot_overp", "tot_notoverp")
 cons_tot <- merge.data.frame(cons_dat, tot_dat, by=c(1,2,3))
 
chisq_func <- function(cons_overp, cons_notoverp, tot_overp, tot_notoverp) {
    a <- cons_overp
    b <- tot_overp - cons_overp
    c <- cons_notoverp
    d <- tot_notoverp - cons_notoverp
    my_mat <- matrix(c(a,b,c,d), byrow=T,nrow=2)
    rownames(my_mat) <- c("coserv","notConserv")
    colnames(my_mat) <- c("Overlap", "notOverlap")
    resp <- chisq.test(my_mat)$p.value
    if(resp >=0.05){return("NS")}
    if(resp < 0.05 & resp >= 0.01){return("*")}
    if(resp < 0.01 & resp >= 0.001) {return("**")}
    if(resp < 0.001) {return("***")}
}
cons_tot_withStar <- cons_tot %>% rowwise() %>% mutate(star= chisq_func(cons_overp, cons_notoverp, tot_overp, tot_notoverp))
 
p <- ggplot(prop_mymethod) + 
    facet_grid( Cell_type ~ .) + 
    geom_bar(aes(x=Protein, y=Prop, fill=Ovp), stat = "Identity", position = "dodge") + 
    scale_fill_manual(values=c( "#E69F00", "#999999")) + 
    scale_y_continuous(limits=c(0,0.8))
  
p + geom_text(data = cons_tot_withStar,aes(x=Protein,y=0.7, label=star), size=7)  +
    labs(title="TAD conserveness among regions of chromatin modification \n Method: Armatus" , x = "Protein" , y = "Proportion of conserved TAD boundaries" ) + 
    theme_bw() +
    theme(axis.text.x=element_text(angle=0, hjust=1)) +
    theme(
        panel.grid = element_line(color="grey85"),  
        #legend.position = c(0.8, 0.3),
        legend.background = element_rect( size=0.5, linetype="solid",colour ="black"),
        legend.title = element_text(face='bold'),
        legend.text = element_text(size=12,face="plain"),
        plot.title = element_text(size = 16, face = "bold",hjust=.5),
        axis.text.x = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain"),
        axis.text.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain"),
        axis.title.x = element_text(colour="grey20",size=13,hjust=.5,vjust=0,face="bold"), 
        axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

  
################ chrom  state #############################    
prop_chr_stat <- read.table("./chrom_state_512.txt", header=T)
ggplot(prop_chr_stat) + 
    geom_bar(aes(x=Chrom_state, y=Prop), stat="Identity")  + 
    scale_y_continuous(limits=c(0,0.8)) + 
    labs(title="TAD conserveness among different chromatin states" , x = "chromatin state" , y = "Proportion of conserved TAD boundaries" ) + 
    theme_bw() +
    theme(
        panel.grid = element_line(color="grey85"),  
        #legend.position = c(0.8, 0.3),
        #axis.text.x=element_text(angle=45, hjust=1),
        legend.background = element_rect( size=0.5, linetype="solid",colour ="black"),
        legend.title = element_text(face='bold'),
        legend.text = element_text(size=12,face="plain"),
        plot.title = element_text(size = 16, face = "bold",hjust=.5),
        axis.text.x = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain", angle=45),
        axis.text.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain"),
        axis.title.x = element_text(colour="grey20",size=13,hjust=.5,vjust=0,face="bold"), 
        axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))


############### share or specific #### 
prop_ssp <- read.table("./shared_specific.txt", header=T)

cons_dat <- dcast(prop_ssp, Method ~ Types, value.var = "Conserved")
names(cons_dat)[2:3] <- c("cons_shared", "cons_specific")

tot_dat <- dcast(prop_ssp, Method ~ Types, value.var = "Total")
names(tot_dat)[2:3] <- c("tot_shared", "tot_specific")
cons_tot <- merge.data.frame(cons_dat, tot_dat, by=1)
cons_tot_withStar <- cons_tot %>% rowwise() %>% mutate(star= chisq_func(cons_shared, cons_specific, tot_shared, tot_specific))

ggplot(prop_ssp) + 
    geom_bar(aes(x=Method, y=Prop, fill=Types), stat = "Identity", position = "dodge") + 
    scale_fill_manual(values=c( "#E69F00", "#999999")) + 
    scale_y_continuous(limits=c(0,0.8)) + 
    geom_text(data = cons_tot_withStar,aes(x=Method,y=0.7, label=star), size=7)  + 
    labs(title="Conserveness for shared or specific TAD boundaries" , x = "Method" , y = "Proportion of conserved TAD boundaries" ) + 
    theme_bw() +
    theme(
        panel.grid = element_line(color="grey85"),  
        #legend.position = c(0.8, 0.3),
        #axis.text.x=element_text(angle=45, hjust=1),
        legend.background = element_rect( size=0.5, linetype="solid",colour ="black"),
        legend.title = element_text(face='bold'),
        legend.text = element_text(size=12,face="plain"),
        plot.title = element_text(size = 16, face = "bold",hjust=.5),
        axis.text.x = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain", angle=0),
        axis.text.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain"),
        axis.title.x = element_text(colour="grey20",size=13,hjust=.5,vjust=0,face="bold"), 
        axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))


####  strong weak ########### 
prop_sw <- read.table("./strong_weak.txt", header=T)

cons_dat <- dcast(prop_sw, Compare + Cell_type ~ Types, value.var = "Conserved")
names(cons_dat)[3:4] <- c("cons_strong", "cons_weak")

tot_dat <- dcast(prop_sw, Compare + Cell_type ~ Types, value.var = "Total")
names(tot_dat)[3:4] <- c("tot_strong", "tot_weak")

cons_tot <- merge.data.frame(cons_dat, tot_dat, by=c(1,2))
cons_tot_withStar <- cons_tot %>% rowwise() %>% mutate(star= chisq_func(cons_strong, cons_weak, tot_strong, tot_weak))

ggplot(prop_sw) + 
    geom_bar(aes(x=Cell_type, y=Prop, fill=Types), stat = "Identity", position = "dodge") +
    facet_grid(Compare ~ .) + 
    scale_fill_manual(values=c( "#E69F00", "#999999")) + 
    scale_y_continuous(limits=c(0,0.8)) + 
    geom_text(data = cons_tot_withStar,aes(x=Cell_type ,y=0.7, label=star), size=7)  +
    labs(title="Conserveness for strong or weak TAD boundaries" , x = "Cell types" , y = "Proportion of conserved TAD boundaries" ) + 
    theme_bw() +
    theme(
        panel.grid = element_line(color="grey85"),  
        #legend.position = c(0.8, 0.3),
        #axis.text.x=element_text(angle=45, hjust=1),
        legend.background = element_rect( size=0.5, linetype="solid",colour ="black"),
        legend.title = element_text(face='bold'),
        legend.text = element_text(size=12,face="plain"),
        plot.title = element_text(size = 16, face = "bold",hjust=.5),
        axis.text.x = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain", angle=0),
        axis.text.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="plain"),
        axis.title.x = element_text(colour="grey20",size=13,hjust=.5,vjust=0,face="bold"), 
        axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

