library("tximport")
library("readr")
library("tximportData")
library("dplyr")
library("DESeq2")
library("ComplexHeatmap")
library("ggplot2")
library("statmod")
library("pheatmap")
library("ggrepel")
library("tidyverse")
library("fgsea")



files_to_read <- list.files(path = "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO",pattern = "genes.results$",full.names = T)

names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))

repair_gene<-read_tsv("/home/users/ayh/Projects/reference/repair_gene.edit.txt")%>%
  select(Gene,Role)%>%
  plyr::rename(c("Gene"="gene_id","Role"="role"))


gtf_df<-read.csv("/home/users/ayh/Projects/reference/RSEM/test.2.gtf",
                 header = F,
                 sep="\t")
gtf_ss_df<-gtf_df%>%select(V10,V14)%>%mutate(id=paste(V10,V14,sep="_"))
names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))

metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/metadata.v2.txt")
metadata<-metadata%>%as.data.frame
rownames(metadata)<-metadata$id

#metadata<-metadata[,2:4]

###1. TP53KO_CTRL_3ug  ##########
##################################


cond="3ug"
sample1="A3A_TP53KO"


vol_meta<-metadata[metadata$dose==cond|metadata$dose=="100ng",]
vol_meta<-vol_meta[vol_meta$type==sample1, ]
vol_meta<-vol_meta[!(vol_meta$time=="48h"&vol_meta$dose=="100ng"),]
#vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
#vol_meta<-vol_meta[grepl("C3",vol_meta$id)&vol_meta$id!="A3A_1st_C3_48h_3ug",]
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]

vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)

vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]

vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1

rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))

vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~dose)

vol_dds<-DESeq(vol_dds)

lib.size<-estimateSizeFactorsForMatrix(vol_txi.rsem$counts)
ed<-t(t(vol_txi.rsem$counts)/lib.size)

means<-rowMeans(ed)

vars<-apply(ed,1,var)###########
cv2<-vars/means^2


smoothScatter(log(means),log(cv2))

minMeanForFit <- unname( quantile( means[ which( cv2 > .3 ) ], .95 ) )
useForFit <- means >= minMeanForFit # & spikeins
fit <- glmgam.fit( cbind( a0 = 1, a1tilde = 1/means[useForFit] ),cv2[useForFit] )
a0 <- unname( fit$coefficients["a0"] )
a1 <- unname( fit$coefficients["a1tilde"])
fit$coefficients
dev.off()
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9); smoothScatter(log(means),log(cv2));
xg <- exp(seq( min(log(means[means>0])), max(log(means)), length.out=1000 ))
vfit <- a1/xg + a0
# add fit line
lines( log(xg), log(vfit), col="black", lwd=3 )
df <- ncol(ed) - 1
# add confidence interval
lines(log(xg),log(vfit * qchisq(0.975,df)/df),lty=2,col="black")
lines(log(xg),log(vfit * qchisq(0.025,df)/df),lty=2,col="black")

afit <- a1/means+a0
varFitRatio <- vars/(afit*means^2)
#varFitRatio
varorder <- order(varFitRatio,decreasing=T)
oed <- ed[varorder,]
oed
points(log(means[varorder[1:100]]),log(cv2[varorder[1:100]]),col=2)

pval <- pchisq(varFitRatio*df,df=df,lower.tail=F)
adj.pval <- p.adjust(pval,"fdr")
sigVariedGenes <- adj.pval<0.05;
table(sigVariedGenes)
pval
sig_gene_df<-data.frame("gene_id"=names(adj.pval),"adj.pval"=adj.pval)%>%filter(adj.pval<0.05)%>%arrange(adj.pval)%>%as.tibble()
sig_gene_top500_df<-sig_gene_df%>%head(500)
table(sigVariedGenes)
sigVariedGenes
oed
#sigVariedGenes[sigVariedGenes]
m<-oed[1:500,]
m
sig_gene_top500_df%>%print(n=20)
write.table(sig_gene_top500_df,"/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/top500_A3A_TP53KO.3ug.txt",
            sep="\t",
            quote = F,
            row.names=F)


top500_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_top500_edit_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(abs(log2FoldChange)>1)%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_vol_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%mutate(logp=-log10(padj))%>%mutate(logp=ifelse(logp==Inf,300,logp))%>%mutate(logp=ifelse(logp>300,300,logp))

a_vol_df<-a_vol_df%>%mutate(dir=ifelse(log2FoldChange>0.5849625&logp>2,"increase",
                                       ifelse(log2FoldChange<(-0.5849625)&logp>2,"decrease",NA)))
a_vol_df<-left_join(a_vol_df,repair_gene)
a_vol_df%>%filter(gene_id=="POLH")
a_vol_TP53KO_df<-a_vol_df
a_vol_df%>%
  filter(!is.na(role))%>%
  filter(!is.na(dir))%>%
  print(n=100)%>%
  arrange(role)



a_vol3<-ggplot(a_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-10,10))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse((gene_id%in%a_anno_gene|logp>150)&abs(log2FoldChange)>1.5,gene_id,"")),
  geom_label_repel(aes(label=ifelse(gene_id%in%repair_gene$gene_id,gene_id,"")),
                   max.overlaps=1000000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5,

  )+
  scale_y_continuous(limit=c(0,350))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))
#a_vol3

pdf("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/volcano.A3A_TP53KO.3ug.pdf",width=15, height=10
)
#ggplot()
a_vol3
dev.off()

a_vol_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/A3A_TP53KO.3ug.DEseq2.txt",
              sep="\t",
              quote=F,
              row.names=F)


###WTvsKO###



cond="100ng"
sample1="A3A"
sample2="A3A_TP53KO"

vol_meta<-metadata[metadata$dose==cond&metadata$time=="0h",]
vol_meta<-vol_meta[vol_meta$type==sample1 | vol_meta$type==sample2,]
#vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
#vol_meta<-vol_meta[grepl("C3",vol_meta$id)&vol_meta$id!="A3A_1st_C3_48h_3ug",]
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]

vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)

vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]

vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1

rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))

vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~type)

vol_dds<-DESeq(vol_dds)

lib.size<-estimateSizeFactorsForMatrix(vol_txi.rsem$counts)
ed<-t(t(vol_txi.rsem$counts)/lib.size)

means<-rowMeans(ed)

vars<-apply(ed,1,var)###########
cv2<-vars/means^2


smoothScatter(log(means),log(cv2))

minMeanForFit <- unname( quantile( means[ which( cv2 > .3 ) ], .95 ) )
useForFit <- means >= minMeanForFit # & spikeins
fit <- glmgam.fit( cbind( a0 = 1, a1tilde = 1/means[useForFit] ),cv2[useForFit] )
a0 <- unname( fit$coefficients["a0"] )
a1 <- unname( fit$coefficients["a1tilde"])
fit$coefficients
dev.off()
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9); smoothScatter(log(means),log(cv2));
xg <- exp(seq( min(log(means[means>0])), max(log(means)), length.out=1000 ))
vfit <- a1/xg + a0
# add fit line
lines( log(xg), log(vfit), col="black", lwd=3 )
df <- ncol(ed) - 1
# add confidence interval
lines(log(xg),log(vfit * qchisq(0.975,df)/df),lty=2,col="black")
lines(log(xg),log(vfit * qchisq(0.025,df)/df),lty=2,col="black")

afit <- a1/means+a0
varFitRatio <- vars/(afit*means^2)
#varFitRatio
varorder <- order(varFitRatio,decreasing=T)
oed <- ed[varorder,]
oed
points(log(means[varorder[1:100]]),log(cv2[varorder[1:100]]),col=2)

pval <- pchisq(varFitRatio*df,df=df,lower.tail=F)
adj.pval <- p.adjust(pval,"fdr")
sigVariedGenes <- adj.pval<0.05;
table(sigVariedGenes)
pval
sig_gene_df<-data.frame("gene_id"=names(adj.pval),"adj.pval"=adj.pval)%>%filter(adj.pval<0.05)%>%arrange(adj.pval)%>%as.tibble()
sig_gene_top500_df<-sig_gene_df%>%head(500)
table(sigVariedGenes)
sigVariedGenes
oed
#sigVariedGenes[sigVariedGenes]
m<-oed[1:500,]
m
sig_gene_top500_df%>%print(n=20)
write.table(sig_gene_top500_df,"/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/top500_A3A_WT_KO.CTRL.txt",
            sep="\t",
            quote = F,
            row.names=F)


top500_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_top500_edit_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(abs(log2FoldChange)>1)%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_vol_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%mutate(logp=-log10(padj))%>%mutate(logp=ifelse(logp==Inf,300,logp))%>%mutate(logp=ifelse(logp>300,300,logp))

a_vol_df<-a_vol_df%>%mutate(dir=ifelse(log2FoldChange>0.5849625&logp>2,"increase",
                                       ifelse(log2FoldChange<(-0.5849625)&logp>2,"decrease",NA)))
a_vol_df<-left_join(a_vol_df,repair_gene)
a_vol_df%>%filter(gene_id=="POLH")
a_vol_df%>%
  filter(!is.na(role))%>%
  filter(!is.na(dir))%>%
  arrange(role)%>%
  print(n=100)


a_vol_WT_KO_df<-a_vol_df

a_vol2<-ggplot(a_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-10,10))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse((gene_id%in%a_anno_gene|logp>150)&abs(log2FoldChange)>1.5,gene_id,"")),
  geom_label_repel(aes(label=ifelse(gene_id%in%repair_gene$gene_id,gene_id,"")),
                   max.overlaps=1000000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5,

  )+
  scale_y_continuous(limit=c(0,350))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))

a_vol2
pdf("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/volcano.A3A_WT_KO.pdf",width=15, height=10
)
#ggplot()
a_vol2
dev.off()

a_vol_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/A3A_WT_KO.DEseq2.txt",
              sep="\t",
              quote=F,
              row.names=F)

###A3A CTRL vs 3ug###



sample1="A3A"
fac1="0h"
fac2="48h"

vol_meta<-metadata[metadata$type==sample1,]
#vol_meta<-vol_meta[vol_meta$type==sample1, ]
#vol_meta<-vol_meta[!(vol_meta$time=="48h"&vol_meta$dose=="100ng"),]
vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta<-vol_meta%>%arrange(time)
vol_meta<-vol_meta[grepl("C3",vol_meta$id),]
vol_meta<-vol_meta[!(vol_meta$time=="48h"&vol_meta$dose=="100ng"),]
vol_meta<-vol_meta[vol_meta$id!="A3A_1st_C3_48h_3ug",]
#vol_meta<-vol_meta[grepl("C3",vol_meta$id)&vol_meta$id!="A3A_1st_C3_48h_3ug",]
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]

vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)

vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]

vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1

vol_meta

vol_txi.rsem$length%>%colnames()
rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))

vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~time)

vol_dds<-DESeq(vol_dds)

lib.size<-estimateSizeFactorsForMatrix(vol_txi.rsem$counts)
ed<-t(t(vol_txi.rsem$counts)/lib.size)

means<-rowMeans(ed)

vars<-apply(ed,1,var)###########
cv2<-vars/means^2


smoothScatter(log(means),log(cv2))

minMeanForFit <- unname( quantile( means[ which( cv2 > .3 ) ], .95 ) )
useForFit <- means >= minMeanForFit # & spikeins
fit <- glmgam.fit( cbind( a0 = 1, a1tilde = 1/means[useForFit] ),cv2[useForFit] )
a0 <- unname( fit$coefficients["a0"] )
a1 <- unname( fit$coefficients["a1tilde"])
fit$coefficients
dev.off()
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9); smoothScatter(log(means),log(cv2));
xg <- exp(seq( min(log(means[means>0])), max(log(means)), length.out=1000 ))
vfit <- a1/xg + a0
# add fit line
lines( log(xg), log(vfit), col="black", lwd=3 )
df <- ncol(ed) - 1
# add confidence interval
lines(log(xg),log(vfit * qchisq(0.975,df)/df),lty=2,col="black")
lines(log(xg),log(vfit * qchisq(0.025,df)/df),lty=2,col="black")

afit <- a1/means+a0
varFitRatio <- vars/(afit*means^2)
#varFitRatio
varorder <- order(varFitRatio,decreasing=T)
oed <- ed[varorder,]
oed
points(log(means[varorder[1:100]]),log(cv2[varorder[1:100]]),col=2)

pval <- pchisq(varFitRatio*df,df=df,lower.tail=F)
adj.pval <- p.adjust(pval,"fdr")
sigVariedGenes <- adj.pval<0.05;
table(sigVariedGenes)
pval
sig_gene_df<-data.frame("gene_id"=names(adj.pval),"adj.pval"=adj.pval)%>%filter(adj.pval<0.05)%>%arrange(adj.pval)%>%as.tibble()
sig_gene_top500_df<-sig_gene_df%>%head(500)
table(sigVariedGenes)
sigVariedGenes
oed
#sigVariedGenes[sigVariedGenes]
m<-oed[1:500,]
m
sig_gene_top500_df%>%print(n=20)
write.table(sig_gene_top500_df,"/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/top500_A3A.3ug.txt",
            sep="\t",
            quote = F,
            row.names=F)


top500_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_top500_edit_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(abs(log2FoldChange)>1)%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_vol_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%mutate(logp=-log10(padj))%>%mutate(logp=ifelse(logp==Inf,300,logp))%>%mutate(logp=ifelse(logp>300,300,logp))



a_vol_df<-a_vol_df%>%mutate(dir=ifelse(log2FoldChange>0.5849625&logp>2,"increase",
                                       ifelse(log2FoldChange<(-0.5849625)&logp>2,"decrease",NA)))
a_vol_df<-left_join(a_vol_df,repair_gene)
a_vol_df%>%filter(gene_id=="POLH")
a_vol_df%>%
  filter(!is.na(role))%>%
  filter(!is.na(dir))%>%
  arrange(role)%>%
  print(n=100)

a_vol_WT_3ug_df<-a_vol_df

a_vol4<-ggplot(a_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-10,10))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse((gene_id%in%a_anno_gene|logp>150)&abs(log2FoldChange)>1.5,gene_id,"")),
  geom_label_repel(aes(label=ifelse(gene_id%in%repair_gene,gene_id,"")),
                   max.overlaps=1000000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5,

  )+
  scale_y_continuous(limit=c(0,350))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))
#a_vol4
#a_vol3
#a_vol2
pdf("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/volcano.A3A.3ug.pdf",width=15, height=10
)
#ggplot()
a_vol4
dev.off()

a_vol_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/A3A.3ug.DEseq2.txt",
              sep="\t",
              quote=F,
              row.names=F)

#####
library(dplyr)
library(tidyverse)
a_vol_WT_3ug_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/A3A.3ug.DEseq2.txt")
a_vol_WT_KO_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/A3A_WT_KO.DEseq2.txt")
a_vol_TP53KO_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/TP53KO/A3A_TP53KO.3ug.DEseq2.txt")



WT_inc_gene<-a_vol_WT_3ug_df%>%filter(!is.na(role))%>%
  filter(dir=="increase")
a_vol_WT_KO_df%>%filter(!is.na(role))%>%
  filter(gene_id%in%WT_inc_gene$gene_id)%>%
  filter(dir=="decrease")
a_vol_TP53KO_df%>%filter(!is.na(role))%>%
  filter(gene_id%in%WT_inc_gene$gene_id)%>%
  filter(dir=="decrease"|is.na(dir))


WT_dec_gene<-a_vol_WT_3ug_df%>%filter(!is.na(role))%>%
  filter(dir=="decrease")
a_vol_WT_KO_df%>%filter(!is.na(role))%>%
  filter(gene_id%in%WT_dec_gene$gene_id)%>%
  filter(dir=="increase"|is.na(dir))%>%
  print(n=100)
a_vol_TP53KO_df%>%filter(!is.na(role))%>%
  filter(gene_id%in%WT_dec_gene$gene_id)%>%
  filter(dir=="increase"|is.na(dir))%>%
  print(n=100)


dir.create("~/R-lib", showWarnings = FALSE)
devtools::install_version("ggplot2", version = "3.4.0", lib = "~/R-lib")

# Load it in your script manually (note: still cannot have two versions loaded!)
library(ggplot2, lib.loc = "~/R-lib")
library(ggVennDiagram)


gene_lists <- list(
  WT = (a_vol_WT_3ug_df%>%filter(!is.na(role))%>%
    filter(dir=="increase"))$gene_id,
  WT_KO_decrease = (a_vol_WT_KO_df%>%filter(!is.na(role))%>%
    #filter(gene_id%in%WT_inc_gene$gene_id)%>%
    filter(dir=="decrease"|is.na(dir)))$gene_id,
      #filter(dir=="decrease"))$gene_id,
  KO_NA= (a_vol_TP53KO_df%>%filter(!is.na(role))%>%
    #filter(gene_id%in%WT_dec_gene$gene_id)%>%
    filter(is.na(dir)))$gene_id
)
(a_vol_WT_KO_df%>%filter(!is.na(role))%>%
    filter(gene_id%in%WT_inc_gene$gene_id)%>%
    filter(dir=="decrease"))
p_ven<-ggVennDiagram(gene_lists, label_alpha = 0, edge_size = 0.5) +
  ggplot2::scale_fill_gradient(low = "white", high = "skyblue")
p_ven
ggsave("/home/users/ayh/Projects/27_A3B/07_revision/Repair_gene.pdf",p_ven,
       height=8,width=10)

venn_obj <- Venn(gene_lists)

WT = (a_vol_WT_3ug_df%>%filter(!is.na(role))%>%
        filter(dir=="increase"))$gene_id
WT_KO_decrease = (a_vol_WT_KO_df%>%filter(!is.na(role))%>%
#                    filter(gene_id%in%WT_inc_gene$gene_id)%>%
                    filter(dir=="decrease"|is.na(dir)))$gene_id
KO_NA= (a_vol_TP53KO_df%>%filter(!is.na(role))%>%
          #filter(gene_id%in%WT_dec_gene$gene_id)%>%
          filter(is.na(dir)))$gene_id
intersect(intersect(WT,WT_KO_decrease),KO_NA)
setdiff(intersect(WT,WT_KO_decrease),KO_NA)
setdiff(intersect(WT,KO_NA),WT_KO_decrease)
#setdiff(intersect(WT_KO_decrease,KO_NA),WT)



gene_lists <- list(
  WT = (a_vol_WT_3ug_df%>%filter(!is.na(role))%>%
          filter(dir=="decrease"))$gene_id,
  WT_KO_increase = (a_vol_WT_KO_df%>%filter(!is.na(role))%>%

                      filter(dir=="increase"|is.na(dir)))$gene_id,
  KO_NA= (a_vol_TP53KO_df%>%filter(!is.na(role))%>%

            filter(is.na(dir)))$gene_id
)
(a_vol_WT_KO_df%>%filter(!is.na(role))%>%
    filter(gene_id%in%WT_inc_gene$gene_id)%>%
    filter(dir=="decrease"))
p_ven2<-ggVennDiagram(gene_lists, label_alpha = 0, edge_size = 0.5) +
  ggplot2::scale_fill_gradient(low = "white", high = "skyblue")
p_ven2
ggsave("/home/users/ayh/Projects/27_A3B/07_revision/Repair_gene.pdf",
       height=8,width=10)
venn_obj <- Venn(gene_lists)

WT2 = (a_vol_WT_3ug_df%>%filter(!is.na(role))%>%
        filter(dir=="decrease"))$gene_id
WT_KO_increase2 = (a_vol_WT_KO_df%>%filter(!is.na(role))%>%

                    filter(dir=="increase"|is.na(dir)))$gene_id
KO_NA2= (a_vol_TP53KO_df%>%filter(!is.na(role))%>%

          filter(is.na(dir)))$gene_id

intersect(intersect(WT2,WT_KO_increase2),KO_NA2)
setdiff(intersect(WT2,WT_KO_increase2),KO_NA2)
setdiff(intersect(WT2,KO_NA2),WT_KO_increase2)
#setdiff(intersect(WT_KO_decrease,KO_NA),WT)
