library("tximport")
library("readr")
library("tximportData")
library("dplyr")
library("DESeq2")
library("ComplexHeatmap")
library("ggplot2")
library("statmod")
library("pheatmap")
#library("EnhancedVolcano")
library("ggrepel")
library("tidyverse")
library("fgsea")
(statmod)
###gene_name



files_to_read <- list.files(path = "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A",pattern = "genes.results$",full.names = T)
files_to_read

names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))
names(files_to_read)
#files_to_read <- list.files(path= "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/02_rsem/rsem/",pattern="genes.results$",full.names=T)
#files_to_read<-files_to_read[1:8]


gtf_df<-read.csv("/home/users/ayh/Projects/reference/RSEM/test.2.gtf",
                 header = F,
                 sep="\t")
gtf_ss_df<-gtf_df%>%select(V10,V14)%>%mutate(id=paste(V10,V14,sep="_"))
names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))
names(files_to_read)


metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/metadata.v2.txt")
metadata<-metadata%>%as.data.frame
rownames(metadata)<-metadata$id
metadata
#metadata<-metadata[,2:4]

###1. A3A top 500 gene  ##########
##################################


i="48h"

cond="3ug"
#sample="A3B"
sample="A3A"
fac1="0h"
fac2=i
vol_meta<-metadata[metadata$dose==cond|metadata$time=="0h",]
vol_meta<-vol_meta[vol_meta$type==sample,]
vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta<-vol_meta[grepl("C3",vol_meta$id)&vol_meta$id!="A3A_1st_C3_48h_3ug",]
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
#txi<-txi.rsem
vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]
rownames((vol_txi.rsem$abundance))
vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1
#idx<-rowSu
#vol_txi.rsem$length[,which(vol_txi.rsem$length == 0)]
rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))

vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~time)

vol_dds<-DESeq(vol_dds)
#vol_res<-results(vol_dds)


lib.size<-estimateSizeFactorsForMatrix(vol_txi.rsem$counts)
ed<-t(t(vol_txi.rsem$counts)/lib.size)

means<-rowMeans(ed)

vars<-apply(ed,1,var)###########
cv2<-vars/means^2

#par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9)
smoothScatter(log(means),log(cv2))

minMeanForFit <- unname( quantile( means[ which( cv2 > .3 ) ], .95 ) )
useForFit <- means >= minMeanForFit # & spikeins
fit <- glmgam.fit( cbind( a0 = 1, a1tilde = 1/means[useForFit] ),cv2[useForFit] )
a0 <- unname( fit$coefficients["a0"] )
a1 <- unname( fit$coefficients["a1tilde"])
fit$coefficients
dev.off()
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9); smoothScatter(log(means),log(cv2));
xg <- exp(seq( min(log(means[means>0])), max(log(means)), length.out=1000 ))
vfit <- a1/xg + a0
# add fit line
lines( log(xg), log(vfit), col="black", lwd=3 )
df <- ncol(ed) - 1
# add confidence interval
lines(log(xg),log(vfit * qchisq(0.975,df)/df),lty=2,col="black")
lines(log(xg),log(vfit * qchisq(0.025,df)/df),lty=2,col="black")

afit <- a1/means+a0
varFitRatio <- vars/(afit*means^2)
#varFitRatio
varorder <- order(varFitRatio,decreasing=T)
oed <- ed[varorder,]
oed
points(log(means[varorder[1:100]]),log(cv2[varorder[1:100]]),col=2)

pval <- pchisq(varFitRatio*df,df=df,lower.tail=F)
adj.pval <- p.adjust(pval,"fdr")
sigVariedGenes <- adj.pval<0.05;
table(sigVariedGenes)
pval
sig_gene_df<-data.frame("gene_id"=names(adj.pval),"adj.pval"=adj.pval)%>%filter(adj.pval<0.05)%>%arrange(adj.pval)%>%as.tibble()
sig_gene_top500_df<-sig_gene_df%>%head(500)
table(sigVariedGenes)
sigVariedGenes
oed
#sigVariedGenes[sigVariedGenes]
m<-oed[1:500,]
m
m%>%as.data.frame()
write.table(sig_gene_top500_df,"/home/users/ayh/Projects/27_A3B/06_Figure_code/top500_A3A.3ug.txt",
            sep="\t",
            quote = F,
            row.names=F)


top500_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_top500_edit_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(abs(log2FoldChange)>1)%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
a_vol_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%mutate(logp=-log10(padj))%>%mutate(logp=ifelse(logp==Inf,300,logp))%>%mutate(logp=ifelse(logp>300,300,logp))
a_vol_df%>%filter(log2FoldChange<(-1))
a_vol_df<-a_vol_df%>%mutate(dir=ifelse(log2FoldChange>1&logp>2,"increase",
                                       ifelse(log2FoldChange<(-1)&logp>2,"decrease",NA)))
a_anno_gene=c("APOBEC3A","CDKN1A","CDC20","GADD45A","IL8","CXCL1","CCL20","MX1","IFIT1","IFIT3")
a_anno_gene=c("APOBEC3A","CDKN1A","CDC20","GADD45A","IL8","CCL20","IFIT1","IFIT3")
a_vol_df%>%arrange(-log2FoldChange)
driver_gene_df<-read_tsv("/home/users/ayh/Projects/TCGA_cancer_census_gene.txt")
driver_gene_df<-driver_gene_df%>%plyr::rename(c("gene"="gene_id"))
a_vol_df<-left_join(a_vol_df,driver_gene_df)
a_vol_df%>%filter(dir%in%c("increase","decrease"))%>%filter(!role%in%c("fusion"))%>%filter(!is.na(role))%>%arrange(padj)
b_vol_df<-left_join(b_vol_df,driver_gene_df)
b_vol_df%>%filter(dir%in%c("increase","decrease"))%>%filter(!role%in%c("fusion"))%>%filter(!is.na(role))%>%arrange(padj)
b_vol_df%>%filter(dir%in%c("increase","decrease"))%>%filter(role%in%c("TSG","oncogene")|(grepl("oncogene",role)&!grepl("TSG",role)))%>%print(n=100)
a_vol_anno_gene=c("APOBEC3A","CDKN1A","CDC20","GADD45A","IL8","CCL20","MDM2","PLK1","SAPCD2","NDRG1","ATF3","CA9","EGLN3","REG1A","BTG2","CCND1","GADD153","DDIT4","BBC3")
#a_vol_anno_gene=c("MGMT")
a_vol_df%>%filter(gene_id=="CEBPZ")
a_anno_gene=c("APOBEC3A","CDKN1A","CDC20","GADD45A","IL8","CCL20","IFIT1","IFIT3","PLK1")
a_anno_gene2=c("AREG", "FAM72C","FAM72D", "EREG", "CCNB1", "ANO1", "FGFBP1", "HIST1H3B", "PLK1", "CDC20", "KIF14",
               "CASP14", "EGLN3", "TREM1", "CA9", "ANGPTL4", "SERPING1", "ADM", "LOX", "SLC2A3", "SERPINE1",
               "ABCA12","REG1A","ABCA12","ATF3")
a_vol_df%>%filter(gene_id%in%c("PTEN","SCD","MLH1","SESN1","DDIT4","PLK3","GADD45A","DDB2","NDRG1","EIF2AK3","EEF1A1","SLC38A2","RPS27L","FDXR","CCNG2","RB1","CDKAL1","SERTAD1","SPAG9","SERPINB5","UBP1","PRDM1","DKK1","FOS","BBC3","TNFRSF10B","FAS","CHMP4C","EEA1","ARHGAP5","VIM","OSBP","PTK2","KITLG","CAV1","EEA1","CHMP4C","IER5","DTWD1","PPM2C"))%>%
  arrange(padj)
a_vol_df%>%filter(gene_id%in%c("BCAS3","NOS3","BNIP3L","HSP90AB1","HSP8","SERPINE1","SCN3B","BCL2L14","APAF1","GPR39","DUSP5","CBLC","DNMT1","VDR","CALD1","ANK1","PPFIBP1","LGALS3"))
atf_gene<-c("BCAS3","NOS3","BNIP3L","HSP90AB1","HSP8","SERPINE1","SCN3B","BCL2L14","APAF1","GPR39","DUSP5","CBLC","DNMT1","VDR","CALD1","ANK1","PPFIBP1","LGALS3")
a_anno_fin_gene<-c("APOBEC3A","CA9","ATF3","ABCA12","REG1A","NDRG1","EGLN3","CDKN1A","ANO1","PLK1","CDC20","CCNB1","CCL20","IL8","BBC3","GADD45A")
atf_gene<-c("NOS3","BNIP3L","SERPINE1","DDIT4","NDRG1","DDB2","GADD45A","FDXR","CCNG2","PRDM1","BBC3","ANK1")
a_vol<-ggplot(a_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-10,10))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse((gene_id%in%a_anno_gene|logp>150)&abs(log2FoldChange)>1.5,gene_id,"")),
  geom_label_repel(aes(label=ifelse(gene_id%in%a_vol_anno_gene,gene_id,"")),
                   max.overlaps=1000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5,
                   
  )+
  scale_y_continuous(limit=c(0,330))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))
a_vol_df%>%arrange(-logp)%>%print(n=50)
a_vol2<-ggplot(a_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-10,10))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse((gene_id%in%a_anno_gene|logp>150)&abs(log2FoldChange)>1.5,gene_id,"")),
  geom_label_repel(aes(label=ifelse(gene_id%in%atf_gene|gene_id%in%a_anno_fin_gene,gene_id,"")),
                   max.overlaps=10000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5,
                   
  )+
  scale_y_continuous(limit=c(0,350))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))
a_vol2
a_vol
a_vol_df
t_vol<-ggplot(a_vol_df,aes(x=log2FoldChange,y=logp,col=dir,text=gene_id))+
  geom_point(size=3,aes(labels=gene_id))+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")#+
theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-10,10))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  
  scale_y_continuous(limit=c(0,330))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))
t_vol
library(plotly)
ggplotly(t_vol)
a_vol%>%ggplotly()
t_vol
htmlwidgets::saveWidget(ggplotly(t_vol), "/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/volcano_A3A.html")
dev.off()


pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/volcano.A3A.pdf",width=15, height=10
)
#ggplot()
a_vol
dev.off()


pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/volcano.A3A.v2.pdf",width=15, height=10
)
#ggplot()
a_vol2
dev.off()
a_vol_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/A3A_DEseq2.txt",
              sep="\t",
              quote=F,
              row.names=F)

mat_test<-mat_test[rownames(mat_test)%in%(top500_df$gene_id),]
t_df$gene
mat_test
gene_order<-mat_test%>%as.data.frame()%>%as.tibble()%>%mutate(gene=rownames(mat_test))%>%gather(id,score,1:6)%>%mutate(group=ifelse(grepl("Ctrl",id),"0h","48h"))%>%group_by(gene,group)%>%dplyr::summarise(mean_score=median(score))%>%ungroup()%>%filter(group=="48h")%>%arrange(-mean_score)
#gene_order%>%mutate(order=1:498)%>%filter(gene=="APOBEC3B")


#total assay for plotting
cond
vol_meta<-metadata[metadata$dose==cond ,]
vol_meta<-vol_meta[vol_meta$type==sample,]
vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
#txi<-txi.rsem
vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]
rownames((vol_txi.rsem$abundance))
vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1
#idx<-rowSu
#vol_txi.rsem$length[,which(vol_txi.rsem$length == 0)]
rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))

vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~time)

vol_dds<-DESeq(vol_dds)
results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)
mat_test<-t(scale(t(assay(rlog(vol_dds)))))
#mat_test<-mat_test[1:10,]
mat_test<-mat_test[rownames(mat_test)%in%(top500_df$gene_id),]

##edit###


##
mat_test
#t_df$gene
#gene_order<-mat_test%>%as.data.frame()%>%as.tibble()%>%mutate(gene=rownames(mat_test))%>%gather(id,score,1:6)%>%mutate(group=ifelse(grepl("0h",id),"0h","48h"))%>%group_by(gene,group)%>%dplyr::summarise(mean_score=mean(score))%>%ungroup()%>%filter(group=="48h")%>%arrange(-mean_score)
#gene_order
rownames(mat_test)
#anno_gene=c("APOBEC3A","CDKN1A","CXCL1","PTGS2","CXCL2","CXCL5","MYC","CCL20","IL1R1","CDKN2B","MDM2","CDC20")
anno_gene=c("APOBEC3A","CDKN1A","CXCL1","PTGS2","CXCL2","CXCL5","MYC","CCL20","IL1R1","CDKN2B","MDM2","CDC20")

#anno_gene<-c("APOBEC3B","CXCL6","CCL20","TNFAIP3","CXCL1","CXCL3","PTGS2","CXCL2","CXCL5","IL17RB","MUC5AC","NFKBIA","LCN2","CCL2","FOSB","CDKN1A")
a_anno_gene=c("APOBEC3A","CDKN1A","GADD45A","IL8","CXCL1","CCL20","MX1","IFIT1","IFIT3")
ha<-rowAnnotation(foo=anno_mark(at=match(anno_gene,rownames(mat_test))[!is.na(match(anno_gene,rownames(mat_test)))],labels=anno_gene, labels_gp = gpar(col = "black", fontsize = 40,fontface="bold"),
                                link_width=unit(15,"mm"),
                                link_gp=gpar(col="black",lwd=3)
)
)

ha<-rowAnnotation(foo=anno_mark(at=match(a_anno_gene,rownames(mat_test))[!is.na(match(a_anno_gene,rownames(mat_test)))],labels=a_anno_gene, labels_gp = gpar(col = "black", fontsize = 40),
                                link_width=unit(15,"mm"),
                                link_gp=gpar(col="black",lwd=2)
)
)

a_anno_gene
#mat_test[rownames(mat_test)=="MDM2",]

mat_test
colnames(mat_test)
ha1<-Heatmap(mat_test,cluster_rows=FALSE,cluster_columns=FALSE,
             column_order=c(4:6,1:3),
             name="Z-score",
             row_order=match(gene_order$gene,rownames(mat_test)),right_annotation=ha,
             show_row_names=FALSE,
             show_column_names = FALSE,
             col = colorRamp2(c(-2, 0, 2), c("#0b67ea", "white","#ba3939" )),
             heatmap_legend_param = list(
               legend_direction = "horizontal",
               legend_width = unit(3, "cm"),
               legend_height=unit(10,"cm"),
               title_gp = gpar(fontsize = 20, fontface = "bold"),
               labels_gp = gpar(fontsize = 15),
               title_position = "topleft",
               grid_height = unit(10, "mm"),
               grid_width = unit(20, "mm")
             )
             
             
             
             
)
#ggplot()
min(mat_test)
a_ha1<-draw(ha1, heatmap_legend_side = "bottom")
dev.off()
pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/heatmap.A3A.pdf",width=10, height=10
)
#ggplot()
a_ha1
dev.off()
rownames(mat_test)
rownames(m)


##fgsea##
##########



pathways<-gmtPathways("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/10_DEG/c5.go.bp.v7.4.symbols.gmt")

a_df%>%filter(gene_id=="APOBEC3B")
a_df<-top500_df
a_df<-a_top500_edit_df
a_df%>%filter(padj>0.05)
ranks_a<-a_df$log2FoldChange
names(ranks_a)<-(a_df$gene_id)
library(fgsea)

fgseaRes<-fgsea(pathways,ranks_a,minSize=10,maxSize=500)
a_fgseaRes_sum<-fgseaRes%>%arrange(-padj)%>%filter(padj<0.05)%>%as.tibble()%>%plyr::rename(c("size"="count"))
a_fgseaRes_sum$pathway<-factor(a_fgseaRes_sum$pathway,levels=a_fgseaRes_sum$pathway)
a_fgseaRes_sum%>%tail(30)%>%print(n=30)
a_p<-a_fgseaRes_sum%>%tail(10)%>%
  ggplot(aes(x=pathway,y=padj,size=count))+
  geom_point()+
  coord_flip()+
  theme_bw()+
  theme(axis.title=element_text(size=40),
        axis.text.x=element_text(size=20),
        axis.text.y=element_blank(),
        legend.title=element_text(size=20),
        legend.text=element_text(size=15))+
  ylab("adj.p-value")+
  scale_y_continuous(breaks=seq(0,0.05,by=0.01),
                     limit=c(0,0.05))

a_p<-a_fgseaRes_sum%>%filter(pathway%in%c("GOBP_CELL_DIVISION",
                                          "GOBP_CHROMOSOME_SEGREGATION",
                                          "GOBP_MITOTIC_CELL_CYCLE",
                                          "GOBP_DEFENSE_RESPONSE",
                                          "GOBP_APOPTOTIC_SIGNALING_PATHWAY"
))%>%
  ggplot(aes(x=pathway,y=padj,size=count))+
  geom_point()+
  coord_flip()+
  theme_bw()+
  theme(axis.title=element_text(size=40),
        axis.text.x=element_text(size=20),
        axis.text.y=element_blank(),
        legend.title=element_text(size=20),
        legend.text=element_text(size=15))+
  ylab("adj.p-value")+
  scale_y_continuous(breaks=seq(0,0.05,by=0.01),
                     limit=c(0,0.05))
a_p
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/gsea.A3A.pdf",
)

t_a_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)%>%arrange(padj)%>%head(500)

t_a_df$gene_id[1:500]

t_rank_a<-t_a_df$log2FoldChange
names(t_rank_a)<-t_a_df$gene_id
t_a_df

tfgseaRes<-fgsea(pathways,t_rank_a,minSize=10,maxSize=500)
tfgseaRes_sum<-tfgseaRes%>%arrange(-padj)%>%filter(padj<0.05)%>%as.tibble()%>%plyr::rename(c("size"="count"))
tfgseaRes_sum%>%tail(30)%>%print(n=100)
(a_fgseaRes_sum%>%filter(pathway=="GOBP_APOPTOTIC_SIGNALING_PATHWAY"))$leadingEdge
#### A3B#########
##################



###1. A3B top 500 gene  ##########
##################################


i="48h"

cond="3ug"
sample="A3B"
#sample="A3A"
fac1="0h"
fac2=i
vol_meta<-metadata[metadata$dose==cond|metadata$time=="0h",]
vol_meta<-vol_meta[vol_meta$type==sample,]
vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
#txi<-txi.rsem
vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]
rownames((vol_txi.rsem$abundance))
vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1
#idx<-rowSu
#vol_txi.rsem$length[,which(vol_txi.rsem$length == 0)]
rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))

vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~time)

vol_dds<-DESeq(vol_dds)
#vol_res<-results(vol_dds)


lib.size<-estimateSizeFactorsForMatrix(vol_txi.rsem$counts)
ed<-t(t(vol_txi.rsem$counts)/lib.size)

means<-rowMeans(ed)

vars<-apply(ed,1,var)###########
cv2<-vars/means^2

#par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9)
smoothScatter(log(means),log(cv2))

minMeanForFit <- unname( quantile( means[ which( cv2 > .3 ) ], .95 ) )
useForFit <- means >= minMeanForFit # & spikeins
fit <- glmgam.fit( cbind( a0 = 1, a1tilde = 1/means[useForFit] ),cv2[useForFit] )
a0 <- unname( fit$coefficients["a0"] )
a1 <- unname( fit$coefficients["a1tilde"])
fit$coefficients
dev.off()
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9); smoothScatter(log(means),log(cv2));
xg <- exp(seq( min(log(means[means>0])), max(log(means)), length.out=1000 ))
vfit <- a1/xg + a0
# add fit line
lines( log(xg), log(vfit), col="black", lwd=3 )
df <- ncol(ed) - 1
# add confidence interval
lines(log(xg),log(vfit * qchisq(0.975,df)/df),lty=2,col="black")
lines(log(xg),log(vfit * qchisq(0.025,df)/df),lty=2,col="black")

afit <- a1/means+a0
varFitRatio <- vars/(afit*means^2)
#varFitRatio
varorder <- order(varFitRatio,decreasing=T)
oed <- ed[varorder,]
oed
points(log(means[varorder[1:500]]),log(cv2[varorder[1:500]]),col=2)
varFitRatio
pval <- pchisq(varFitRatio*df,df=df,lower.tail=F)
adj.pval <- p.adjust(pval,"fdr")
sigVariedGenes <- adj.pval<0.001;
table(sigVariedGenes)
sig_gene_df<-data.frame("gene_id"=names(adj.pval),"adj.pval"=adj.pval)%>%filter(adj.pval<0.05)%>%arrange(adj.pval)
sig_gene_df
sig_gene_top500_df<-sig_gene_df%>%head(500)
table(sigVariedGenes)
sigVariedGenes
oed
#sigVariedGenes[sigVariedGenes]
m<-oed[1:500,]
m
apply(m,1,max)
m
m%>%as.data.frame()
write.table(sig_gene_top500_df,"/home/users/ayh/Projects/27_A3B/06_Figure_code/top500_A3B.3ug.txt",
            sep="\t",
            quote = F,
            row.names=F)
m
m_a<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/top500_A3B.txt")
m_a

mat_test<-t(scale(t(assay(vol_dds))))
#mat_test<-mat_test[1:10,]
mat_test[rownames(mat_test)=="APOBEC3B",]
vol_meta
metadata
m


top500_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)
b_top500_edit_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(abs(log2FoldChange)>1)%>%filter(padj<0.05)%>%filter(gene_id%in%sig_gene_df$gene_id)%>%arrange(padj)%>%head(500)

mat_test<-mat_test[rownames(mat_test)%in%(top500_df$gene_id),]
t_df$gene
mat_test
gene_order<-mat_test%>%as.data.frame()%>%as.tibble()%>%mutate(gene=rownames(mat_test))%>%gather(id,score,1:6)%>%mutate(group=ifelse(grepl("0h",id),"0h","48h"))%>%group_by(gene,group)%>%dplyr::summarise(mean_score=median(score))%>%ungroup()%>%filter(group=="48h")%>%arrange(-mean_score)
gene_order%>%mutate(order=1:498)%>%filter(gene=="APOBEC3B")


#total assay for plotting
cond
vol_meta<-metadata[metadata$dose==cond ,]
vol_meta<-vol_meta[vol_meta$type==sample,]
vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
#txi<-txi.rsem
vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]
rownames((vol_txi.rsem$abundance))
vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1
#idx<-rowSu
#vol_txi.rsem$length[,which(vol_txi.rsem$length == 0)]
rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))

vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~time)

vol_dds<-DESeq(vol_dds)
results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)
#mat_test<-t(scale(t(assay(vol_dds))))
mat_test<-t(scale(t(assay(rlog(vol_dds)))))
#mat_test<-mat_test[1:10,]
mat_test<-mat_test[rownames(mat_test)%in%(top500_df$gene_id),]
mat_test
#t_df$gene
#gene_order<-mat_test%>%as.data.frame()%>%as.tibble()%>%mutate(gene=rownames(mat_test))%>%gather(id,score,1:6)%>%mutate(group=ifelse(grepl("0h",id),"0h","48h"))%>%group_by(gene,group)%>%dplyr::summarise(mean_score=mean(score))%>%ungroup()%>%filter(group=="48h")%>%arrange(-mean_score)
#gene_order
rownames(mat_test)
b_anno_gene<-c("APOBEC3B","CDKN1A","GADD45A","IL8","CXCL1","CCL20","MX1","IFIT1","IFIT3",)
ha<-rowAnnotation(foo=anno_mark(at=match(b_anno_gene,rownames(mat_test))[!is.na(match(b_anno_gene,rownames(mat_test)))],labels=b_anno_gene, labels_gp = gpar(col = "black", fontsize = 40,fontface="bold"),
                                link_width=unit(15,"mm"),
                                link_gp=gpar(col="black",lwd=3)
)
)

ha<-rowAnnotation(foo=anno_mark(at=match(b_anno_gene,rownames(mat_test))[!is.na(match(b_anno_gene,rownames(mat_test)))],labels=b_anno_gene, labels_gp = gpar(col = "black", fontsize = 40),
                                link_width=unit(15,"mm"),
                                link_gp=gpar(col="black",lwd=2)
)
)

#mat_test[rownames(mat_test)=="MDM2",]
ha
mat_test
library(circlize)
colnames(mat_test)
ha1<-Heatmap(mat_test,cluster_rows=FALSE,cluster_columns=FALSE,
             column_order=c(1:6),
             name="Z-score",
             row_order=match(gene_order$gene,rownames(mat_test)),right_annotation=ha,
             show_row_names=FALSE,
             show_column_names = FALSE,
             col = colorRamp2(c(-2, 0, 2), c("#0b67ea", "white","#ba3939" )),
             heatmap_legend_param = list(
               legend_direction = "horizontal",
               legend_width = unit(3, "cm"),
               legend_height=unit(10,"cm"),
               title_gp = gpar(fontsize = 20, fontface = "bold"),
               labels_gp = gpar(fontsize = 15),
               title_position = "topleft",
               grid_height = unit(10, "mm"),
               grid_width = unit(20, "mm"),
               col_fun = c("Orange","white","black")
             )
             
             
             
             
)
ha1

draw(ha1, heatmap_legend_side = "bottom")
rownames(mat_test)
rownames(m)
#b_ha1<-ha1
b_ha1<-draw(ha1, heatmap_legend_side = "bottom")
dev.off()
pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/heatmap.A3B.pdf",width=10, height=10
)
b_ha1
dev.off()

b_vol_df%>%filter(gene_id=="APOBEC3B")
b_vol_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%mutate(logp=-log10(padj))%>%mutate(logp=ifelse(logp==Inf,300,logp))%>%mutate(logp=ifelse(logp>300,300,logp))
b_vol_df%>%filter(log2FoldChange<(-1))
b_vol_df<-b_vol_df%>%mutate(dir=ifelse(log2FoldChange>1&logp>2,"increase",
                                       ifelse(log2FoldChange<(-1)&logp>2,"decrease",NA)))
anno_gene<-c("APOBEC3B","CXCL6","CCL20","TNFAIP3","CXCL1","CXCL3","PTGS2","CXCL2","CXCL5","IL17RB","MUC5AC","NFKBIA","LCN2","CCL2","FOSB","CDKN1A")
b_vol_anno_gene<-c("APOBEC3B","YARS","CCL20","CDKN1A","IL8","TNFAIP2","CXCL2","ADRA2A","ICAM1","MUC13","ATF3","TGM2","ANXA10")
b_vol_df%>%filter(gene_id=="VIM")
b_vol<-ggplot(b_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-11,11))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse(logp>150&(abs(log2FoldChange)>1.5),gene_id,"")),
  geom_label_repel(aes(label=ifelse(gene_id%in%b_vol_anno_gene,gene_id,"")),
                   max.overlaps=1000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5
  )+
  scale_y_continuous(limit=c(0,350))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))
b_vol

b_vol_df%>%arrange(-logp)%>%print(n=30)
b_vol2<-ggplot(b_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-11,11))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse(logp>150&(abs(log2FoldChange)>1.5),gene_id,"")),
  geom_label_repel(aes(label=ifelse( gene_id%in%anno_gene | gene_id%in%b_vol_anno_gene,gene_id,"")),
                   max.overlaps=12000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5
  )+
  scale_y_continuous(limit=c(0,310))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))

t_b_vol<-ggplot(b_vol_df,aes(x=log2FoldChange,y=logp,col=dir,label=gene_id))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")#+
theme_bw()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-11,11))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  geom_label_repel(aes(label=ifelse(logp>150&(abs(log2FoldChange)>1.5),gene_id,"")),
                   max.overlaps=1000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5
  )+
  scale_y_continuous(limit=c(0,310))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))

b_vol
ggplotly(t_b_vol)
htmlwidgets::saveWidget(ggplotly(t_b_vol), "/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/volcano_A3B.html")
htmlwidgets::saveWidget(ggplotly(t_a_vol), "/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/volcano_A3A.html")
pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/volcano.A3B.pdf",width=15, height=10
)
#ggplot()
b_vol
dev.off()


pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/volcano.v2.pdf",width=15, height=10
)
b_vol2
dev.off()

b_vol_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/A3B_DEseq2.txt",
              sep="\t",
              quote=F,
              row.names=F)
top500_d
##fgsea##
##########



pathways<-gmtPathways("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/10_DEG/c5.go.bp.v7.4.symbols.gmt")


a_df<-top500_df
a_df<-b_top500_edit_df
a_df%>%filter(padj>0.05)
ranks_a<-a_df$log2FoldChange
names(ranks_a)<-(a_df$gene_id)



fgseaRes<-fgsea(pathways,ranks_a,minSize=10,maxSize=500)
b_fgseaRes_sum<-fgseaRes%>%arrange(-padj)%>%filter(padj<0.05)%>%as.tibble()%>%plyr::rename(c("size"="count"))
b_fgseaRes_sum<-b_fgseaRes_sum%>%mutate(pathway=gsub("GOBP_","",pathway))
b_fgseaRes_sum$pathway<-factor(b_fgseaRes_sum$pathway,levels=b_fgseaRes_sum$pathway)
p<-b_fgseaRes_sum%>%tail(10)%>%
  ggplot(aes(x=pathway,y=padj,size=count))+
  geom_point()+
  coord_flip()+
  theme_bw()+
  theme(axis.title=element_text(size=30),
        axis.text.x=element_text(size=10),
        #axis.text.y=element_text(size=10),
        axis.text.y=element_blank(),
        legend.title=element_text(size=20),
        legend.text=element_text(size=15))+
  ylab("adj.p-value")+
  scale_y_continuous(breaks=seq(0,0.05,by=0.01),
                     limit=c(0,0.05))
p
#dev.off()
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig1/gsea.A3B.pdf",p,
       
)


t_b_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%filter(padj<0.05)%>%arrange(padj)%>%head(500)

t_b_df$gene_id[1:500]

t_rank_b<-t_b_df$log2FoldChange
names(t_rank_b)<-t_b_df$gene_id
t_b_df

tfgseaRes<-fgsea(pathways,t_rank_b,minSize=10,maxSize=500)
tfgseaRes_sum<-tfgseaRes%>%arrange(-padj)%>%filter(padj<0.05)%>%as.tibble()%>%plyr::rename(c("size"="count"))
tfgseaRes_sum%>%tail(30)%>%print(n=100)%>%print(n=100)
(a_fgseaRes_sum%>%filter(pathway=="GOBP_APOPTOTIC_SIGNALING_PATHWAY"))$leadingEdge



b_df<-results(vol_dds)%>%data.frame()%>%mutate(gene_id=rownames(results(vol_dds)))%>%as.tibble()%>%
  filter(gene_id%in%rownames(m))%>%arrange(padj)

ranks_b<-b_df$log2FoldChange
names(ranks_b)<-(b_df$gene_id)

fgseaRes<-fgsea(pathways,ranks_b,minSize=10,maxSize=500)
fgseaRes_sum<-fgseaRes%>%arrange(padj)%>%filter(padj<0.05)
fgseaRes_sum$pathway<-factor(fgseaRes_sum$pathway,levels=fgseaRes_sum$pathway)
fgseaRes_sum%>%
  ggplot(aes(x=pathway,y=padj,size=size))+
  geom_point()+
  coord_flip()



### 0h compare###
#################




i="48h"

cond="100ng"
#sample="A3B"
sample="A3A"
fac1="0h"
fac2="0h"
vol_meta<-metadata[metadata$dose==cond ,]
vol_meta<-vol_meta[vol_meta$type%in%c("A3A","A3B"),]
vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
#txi<-txi.rsem
vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]
rownames((vol_txi.rsem$abundance))
vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1
#idx<-rowSu
#vol_txi.rsem$length[,which(vol_txi.rsem$length == 0)]
rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
vol_txi.rsem
vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~type)

vol_dds<-DESeq(vol_dds)



zero_vol_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%mutate(logp=-log10(padj))%>%mutate(logp=ifelse(logp==Inf,300,logp))%>%mutate(logp=ifelse(logp>300,300,logp))


zero_vol_df<-zero_vol_df%>%mutate(dir=ifelse(log2FoldChange>1&logp>2,"increase",
                                             ifelse(log2FoldChange<(-1)&logp>2,"decrease",NA)))

zero_vol_df%>%filter(gene_id=="APOBEC3B")
ggplot(zero_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  scale_x_continuous(limit=c(-11,11))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse(logp>150&(abs(log2FoldChange)>1.5),gene_id,"")),
  geom_label_repel(aes(label=ifelse(logp>100,gene_id,"")),
                   max.overlaps=1000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5
  )+
  scale_y_continuous(limit=c(0,310))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))


##both compare##
################


### 0h compare###
#################




i="48h"

cond="100ng"
#sample="A3B"
sample="A3A"
fac1="0h"
fac2="48h"
vol_meta<-metadata[metadata$dose==cond ,]
vol_meta<-vol_meta[vol_meta$type%in%c("A3A","A3B"),]
vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
#txi<-txi.rsem
vol_txi.rsem$abundance<-vol_txi.rsem$abundance[rownames(vol_txi.rsem$abundance)%in%gtf_ss_df$id,]
vol_txi.rsem$counts<-vol_txi.rsem$counts[rownames(vol_txi.rsem$counts)%in%gtf_ss_df$id,]
vol_txi.rsem$length<-vol_txi.rsem$length[rownames(vol_txi.rsem$length)%in%gtf_ss_df$id,]
rownames((vol_txi.rsem$abundance))
vol_txi.rsem$length[vol_txi.rsem$length == 0] <-1
#idx<-rowSu
#vol_txi.rsem$length[,which(vol_txi.rsem$length == 0)]
rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
vol_txi.rsem
vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~time)

vol_dds<-DESeq(vol_dds)



both_vol_df<-results(vol_dds)%>%as.tibble()%>%mutate("gene_id"=rownames(results(vol_dds)))%>%mutate(logp=-log10(padj))%>%mutate(logp=ifelse(logp==Inf,300,logp))%>%mutate(logp=ifelse(logp>300,300,logp))


both_vol_df<-both_vol_df%>%mutate(dir=ifelse(log2FoldChange>1&logp>2,"increase",
                                             ifelse(log2FoldChange<(-1)&logp>2,"decrease",NA)))

zero_vol_df%>%filter(gene_id=="APOBEC3B")
ggplot(both_vol_df,aes(x=log2FoldChange,y=logp,col=dir))+
  geom_point(size=3)+
  geom_vline(xintercept=1,linetype="dashed",colour="black")+
  geom_vline(xintercept=-1,linetype="dashed",colour="black")+
  geom_hline(yintercept=2,linetype="dashed",colour="black")+
  theme_classic()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=40),
        legend.title=element_blank(),
        legend.text=element_text(size=30),
        axis.ticks.y=element_line(size=3),
        axis.ticks.x=element_line(size=2),
        axis.ticks.length=unit(.25,"cm"),
        #        legend.position="top"
  )+
  #scale_x_continuous(limit=c(-11,11))+
  scale_colour_manual(values=c("#4db7f7","#c54242"),limits=c("decrease","increase"))+
  #geom_label_repel(aes(label=ifelse(logp>150&(abs(log2FoldChange)>1.5),gene_id,"")),
  geom_label_repel(aes(label=ifelse(logp>20,gene_id,"")),
                   max.overlaps=1000, min.segment.length = 0.1,vjust=1,
                   show.legend=F,
                   segment.color = 'grey50',
                   size=5
  )+
  scale_y_continuous(limit=c(0,100))+
  guides(colour = guide_legend(override.aes = list(size=7)))+
  ylab(expression(-log[10]*"(adj.Pval)"))+
  xlab(expression(log[2]*"FoldChange"))


both_vol_df%>%arrange(padj)

zero_vol_df%>%filter(!is.na(dir))
a_df<-re



t_df<-(df%>%filter(gene%in%rownames(m))%>%arrange(-log2FoldChange))
m
t_df$gene<-as.factor(t_df$gene)

m[match(rownames(m),t_df$gene),]

t_df

ggplot(t_df,aes(x=log2FoldChange))+
  geom_histogram()
anno_gene<-c("APOBEC3B","CXCL6","CCL20","TNFAIP3","CXCL1","CXCL3","PTGS2","CXCL2","CXCL5","IL17RB","MUC5AC","NFKBIA","LCN2","CCL2","FOSB")

match(rownames(m),anno_gene)[!is.na(match(rownames(m),anno_gene))]


Heatmap(log2(m+1),cluster_rows=FALSE,row_order=match(rownames(m),t_df$gene),right_annotation=ha)
heatmap(m/apply(m,1,max),zlim=c(0,1),col=gray.colors(100),Rowv=NA,Colv=NA,labRow=NA,scale="none",ColSideColors=ifelse(grepl("ES",colnames(m)),"red","blue"))

test<-df%>%filter(!is.na(padj))
test<-test%>%mutate(gene=ifelse(gene%in%c("APOBEC3A","CXCL2"),gene,NA))

ggplot(test,aes(x=log2FoldChange, y=(-log10(padj)),col=threshold)) +
  geom_point() +
  ggtitle(paste("A3B_1st_C5",cond,fac1,"vs",fac2,sep="_")) +
  #geom_text()
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  ylim((c(0,max(-log10(test$padj),na.rm=TRUE))))+
  #xlim(c(-30,30))+
  #    scale_x_continuous(breaks=pretty(c(max(df$log2FoldChange,na.rm=TRUE),min(df$log2FoldChange,na.rm=TRUE))))+
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = padj_thres, colour="black",size=1 ) +
  geom_vline(xintercept = FC_thres, colour="black",size=1,linetype="dashed") +
  geom_vline(xintercept = -FC_thres, colour="black",size=1)+
  geom_label_repel(data=test%>%filter(gene%in%rownames(m))%>%filter(-log10(padj)>padj_thres & abs(log2FoldChange)>FC_thres),aes(x=log2FoldChange,y=-log10(padj),label=gene),max.overlaps = 100,size=4,box.padding =0.5,
                   fill="white")+
  scale_color_manual(values=col)+
  theme_test()+
  theme(axis.title=element_text(size=40),
        axis.text=element_text(size=35),
        legend.position="none"
        
  )+
  ylab("-log10(padj)")
m <- oed[1:20,]
test2<-test%>%mutate(gene=ifelse(gene%in%rownames(m),gene,NA))
test2%>%filter(!is.na(gene))
ggplot(test2,aes(x=log2FoldChange, y=(-log10(padj)),col=threshold)) +
  geom_point() +
  ggtitle(paste("A3A_1st_C3",cond,fac1,"vs",fac2,sep="_")) +
  #geom_text()
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  ylim((c(0,max(-log10(test$padj),na.rm=TRUE))))+
  #xlim(c(-30,30))+
  #    scale_x_continuous(breaks=pretty(c(max(df$log2FoldChange,na.rm=TRUE),min(df$log2FoldChange,na.rm=TRUE))))+
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = padj_thres, colour="black",size=1,linetype="dashed" ) +
  geom_vline(xintercept = FC_thres, colour="black",size=1,linetype="dashed") +
  geom_vline(xintercept = -FC_thres, colour="black",size=1,linetype="dashed")+
  #geom_text(aes(label=gene))
  geom_label_repel(fill="white",data=test2%>%filter(gene%in%rownames(m))%>%filter(-log10(padj)>padj_thres & abs(log2FoldChange)>FC_thres),aes(x=log2FoldChange,y=-log10(padj),label=gene),max.overlaps = 100,
                   size=8,box.padding =0.5)+
  scale_color_manual(values=col)+
  theme_test()+
  theme(axis.title=element_text(size=40),
        axis.text=element_text(size=35),
        legend.position="none"
        
  )+
  ylab("-log10(padj)")+
  xlab("log2FoldChange")
test3<-test%>%mutate(gene=ifelse(gene%in%c("APOBEC3A"),gene,NA))

ggplot(test3,aes(x=log2FoldChange, y=(-log10(padj)),col=threshold)) +
  geom_point() +
  ggtitle(paste("A3A_1st_C3",cond,fac1,"vs",fac2,sep="_")) +
  #geom_text()
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  ylim((c(0,max(-log10(test$padj),na.rm=TRUE))))+
  #xlim(c(-30,30))+
  #    scale_x_continuous(breaks=pretty(c(max(df$log2FoldChange,na.rm=TRUE),min(df$log2FoldChange,na.rm=TRUE))))+
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = padj_thres, colour="black",size=1,linetype="dashed" ) +
  geom_vline(xintercept = FC_thres, colour="black",size=1,linetype="dashed") +
  geom_vline(xintercept = -FC_thres, colour="black",size=1,linetype="dashed")+
  #geom_text(aes(label=gene))
  geom_label_repel(fill="white",data=test3%>%filter(-log10(padj)>padj_thres & abs(log2FoldChange)>FC_thres),aes(x=log2FoldChange,y=-log10(padj),label=gene),max.overlaps = 100,size=8,box.padding =0.5)+
  scale_color_manual(values=col)+
  theme_test()+
  theme(axis.title=element_text(size=40),
        axis.text=element_text(size=35),
        legend.position="none"
        
  )+
  ylab("-log10(padj)")+
  xlab("log2FoldChange")
m[1:10,]
test4<-test%>%mutate(gene=ifelse(gene%in%rownames(m[1:10,]),gene,NA))
test4
ggplot(test4,aes(x=log2FoldChange, y=(-log10(padj)),col=threshold)) +
  geom_point() +
  ggtitle(paste("A3A_1st_C3",cond,fac1,"vs",fac2,sep="_")) +
  #geom_text()
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  ylim((c(0,max(-log10(test$padj),na.rm=TRUE))))+
  #xlim(c(-30,30))+
  #    scale_x_continuous(breaks=pretty(c(max(df$log2FoldChange,na.rm=TRUE),min(df$log2FoldChange,na.rm=TRUE))))+
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = padj_thres, colour="black",size=1 ,linetype="dashed") +
  geom_vline(xintercept = FC_thres, colour="black",size=1,linetype="dashed") +
  geom_vline(xintercept = -FC_thres, colour="black",size=1,linetype="dashed")+
  #geom_text(aes(label=gene))
  geom_label_repel(fill="white",
                   data=test4%>%filter(gene%in%rownames(m))%>%filter(-log10(padj)>padj_thres & abs(log2FoldChange)>FC_thres),aes(x=log2FoldChange,y=-log10(padj),label=gene),
                   max.overlaps = 100,
                   size=8,
                   box.padding =0.5,
                   fill="white")+
  scale_color_manual(values=col)+
  theme_test()+
  theme(axis.title=element_text(size=40),
        axis.text=element_text(size=35),
        legend.position="none"
        
  )+
  ylab("-log10(padj)")+
  xlab("log2FoldChange")

target_gene<-c(
  "APOBEC3A",
  "ATF3",
  "CA9",
  "ABCA12",
  "REG1A",
  "NDRG1",
  "CXCL2",
  "IL8",
  "CCL20",
  "CDKN1A",
  "UNG",
  "REV1",
  "DDB2",
  "XPC"
  
)

target_gene
test5<-test%>%mutate(gene=ifelse(gene%in%target_gene,gene,NA))

ggplot(test5,aes(x=log2FoldChange, y=(-log10(padj)),col=threshold)) +
  geom_point() +
  ggtitle(paste("A3A_1st_C3",cond,fac1,"vs",fac2,sep="_")) +
  #geom_text()
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  ylim((c(0,max(-log10(test$padj),na.rm=TRUE))))+
  #xlim(c(-30,30))+
  #    scale_x_continuous(breaks=pretty(c(max(df$log2FoldChange,na.rm=TRUE),min(df$log2FoldChange,na.rm=TRUE))))+
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = padj_thres, colour="black",size=1,linetype="dashed" ) +
  geom_vline(xintercept = FC_thres, colour="black",size=1,linetype="dashed") +
  geom_vline(xintercept = -FC_thres, colour="black",size=1,linetype="dashed")+
  #geom_text(aes(label=gene))
  geom_label_repel(data=test5,aes(x=log2FoldChange,y=-log10(padj),label=gene),
                   max.overlaps = 100,
                   size=8,
                   box.padding =0.5,
                   segment.colour="black",)+
  scale_color_manual(values=col)+
  theme_test()+
  theme(axis.title=element_text(size=40),
        axis.text=element_text(size=35),
        legend.position="none"
  )+
  ylab("-log10(padj)")+
  xlab("log2FoldChange")





#####################
### A3A 0h vs 48h  ######
#####################

files_to_read <- list.files(path = "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A",pattern = "genes.results$",full.names = T)
#files_to_read <- list.files(path= "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/02_rsem/rsem/",pattern="genes.results$",full.names=T)
#files_to_read<-files_to_read[1:8]


names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))
names(files_to_read)


metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/metadata.txt")
metadata<-metadata%>%as.data.frame
rownames(metadata)<-metadata$id
metadata
#metadata<-metadata[,2:4]




for (i in c("12h","24h","36h","48h")){
  print (i)
  cond="100ng"
  sample="A3A"
  fac1="0h"
  fac2=i
  vol_meta<-metadata[metadata$dose==cond ,]
  vol_meta<-vol_meta[vol_meta$type==sample,]
  vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
  vol_meta
  vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
  vol_files_to_read
  vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
  #vol_txi.rsem$counts
  txi<-txi.rsem
  vol_txi.rsem$length[vol_txi.rsem$length == 0] <- 1
  
  rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
  rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
  rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
  #vol_files_to_read %>%length()
  vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                    colData=vol_meta,
                                    design=~time)
  
  vol_dds<-DESeq(vol_dds)
  #vol_res<-results(vol_dds)
  fac2
  fac1
  vol_res<-results(vol_dds,contrast=c("time",fac2,fac1))
  
  df<-cbind(vol_res$log2FoldChange,vol_res$padj,rownames(vol_res),vol_res$pvalue)%>%as.tibble
  colnames(df)<-c("log2FoldChange","padj","gene","pval")
  #df%>%filter(p)
  df$log2FoldChange<-as.double(df$log2FoldChange)
  df$padj<-as.double(df$padj)
  is.double(df$log2FoldChange)
  df
  df<-df%>%mutate(threshold=ifelse(log2FoldChange>FC_thres&-log10(padj)>1.3,
                                   "increase",
                                   ifelse(log2FoldChange<(-FC_thres) & -log10(padj)>1.3,"decrease","NA")))
  #df%>%filter(threshold=="decrease")%>%arrange(-log2FoldChange)
  #$rownames(df)<-rownames(vol_res)
  col<-c("blue","red","#808080")
  df<-df %>%arrange(padj)
  df$padj[-log10(df$padj)>300]<-(10^-300)
  df
  ggplot(df%>%filter(!is.na(padj)),) +
    geom_point(aes(x=log2FoldChange, y=(-log10(padj)),col=threshold)) +
    ggtitle(paste("A3A_1st_C3",cond,fac1,"vs",fac2,sep="_")) +
    xlab("log2 fold change") +
    ylab("-log10 adjusted p-value") +
    ylim((c(0,max(-log10(df$padj),na.rm=TRUE))))+
    #xlim(c(-30,30))+
    #    scale_x_continuous(breaks=pretty(c(max(df$log2FoldChange,na.rm=TRUE),min(df$log2FoldChange,na.rm=TRUE))))+
    #scale_y_continuous(limits = c(0,50)) +
    theme(legend.position = "none",
          plot.title = element_text(size = rel(1.5), hjust = 0.5),
          axis.title = element_text(size = rel(1.25)))  +
    geom_hline(yintercept = 1.3, colour="#990000", linetype="dashed") +
    geom_vline(xintercept = FC_thres, colour="#990000", linetype="dashed") + geom_vline(xintercept = -FC_thres, colour="#990000", linetype="dashed")+
    geom_text_repel(data=head(df%>%filter(df$threshold!="NA"),70),aes(x=log2FoldChange,y=-log10(padj),label=gene),max.overlaps = 100,size=3,box.padding =0.5)+
    scale_color_manual(values=col)
  
  ggsave(paste0(paste("A3A_1st_C3",cond,fac1,"vs",fac2,sep="_"),".pdf"))
  
}





##volcano by ggplot##
#####################
### TP53WT vs TP53KO  ######
#####################

files_to_read <- list.files(path = "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A",pattern = "genes.results$",full.names = T)
#files_to_read <- list.files(path= "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/02_rsem/rsem/",pattern="genes.results$",full.names=T)
#files_to_read<-files_to_read[1:8]


names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))
names(files_to_read)


metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/metadata.txt")
metadata<-metadata%>%as.data.frame
rownames(metadata)<-metadata$id
metadata
metadata<-metadata[,2:4]




for (i in c("100ng","3ug")){
  print (i)
  cond=i
  #  cond="100ng"
  time="48h"
  sample1="A3B"
  sample2="A3B_TP53KO"
  #  fac1="0h"
  #  fac2=i
  vol_meta<-metadata[metadata$dose==cond ,]
  vol_meta<-vol_meta[vol_meta$type==sample2 | vol_meta$type==sample1,]
  vol_meta<-vol_meta[vol_meta$time==time ,]
  #  vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
  vol_meta
  vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
  vol_files_to_read
  vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
  #vol_txi.rsem$counts
  txi<-txi.rsem
  txi
  vol_txi.rsem$length[vol_txi.rsem$length == 0] <- 1
  
  rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
  rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
  rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
  #vol_files_to_read %>%length()
  vol_txi.rsem
  vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                    colData=vol_meta,
                                    design=~type)
  
  vol_dds<-DESeq(vol_dds)
  #vol_res<-results(vol_dds)
  fac2
  fac1
  vol_res<-results(vol_dds,contrast=c("type",sample2,sample1))
  
  df<-cbind(vol_res$log2FoldChange,vol_res$padj,rownames(vol_res))%>%as.tibble
  colnames(df)<-c("log2FoldChange","padj","gene")
  #df%>%filter(p)
  df$log2FoldChange<-as.double(df$log2FoldChange)
  df$padj<-as.double(df$padj)
  is.double(df$log2FoldChange)
  df<-df%>%mutate(threshold=ifelse(log2FoldChange>1.5&-log10(padj)>1.3,
                                   "increase",
                                   ifelse(log2FoldChange<(-1.5) & -log10(padj)>1.3,"decrease","NA")))
  #df%>%filter(threshold=="decrease")%>%arrange(-log2FoldChange)
  #$rownames(df)<-rownames(vol_res)
  col<-c("blue","red","#808080")
  df<-df %>%arrange(padj)
  df$padj[-log10(df$padj)>300]<-(10^-300)
  
  ggplot(df %>%filter(!is.na(padj)),) +
    geom_point(aes(x=log2FoldChange, y=-log10(padj),col=threshold)) +
    ggtitle(paste("A3B_1st_C5",time,cond,"TP53","vs","TP53KO",sep="_")) +
    xlab("log2 fold change") +
    ylab("-log10 adjusted p-value") +
    #  ylim(c(0,400))+
    #    xlim(c(-30,30))+
    #scale_y_continuous(limits = c(0,50)) +
    theme(legend.position = "none",
          plot.title = element_text(size = rel(1.5), hjust = 0.5),
          axis.title = element_text(size = rel(1.25)))  +
    geom_hline(yintercept = 1.33, colour="#990000", linetype="dashed") +
    geom_vline(xintercept = 1.5, colour="#990000", linetype="dashed") + geom_vline(xintercept = -1.5, colour="#990000", linetype="dashed")+
    geom_text_repel(data=head(df%>%filter(threshold!="NA"),70),aes(x=log2FoldChange,y=-log10(padj),label=gene),max.overlaps = 100,size=2.5,box.padding =0.5)+
    scale_color_manual(values=col)
  
  ggsave(paste0(paste("A3B_1st_C5",time,cond,"TP53","vs","TP53KO",sep="_"),".pdf"))
  
}



##volcano by ggplot##
#####################
### A3A 0h vs A3A  48h ######
#####################

files_to_read <- list.files(path = "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A",pattern = "genes.results$",full.names = T)
#files_to_read <- list.files(path= "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/02_rsem/rsem/",pattern="genes.results$",full.names=T)
#files_to_read<-files_to_read[1:8]


names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))
names(files_to_read)


metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/metadata.txt")
metadata<-metadata%>%as.data.frame
rownames(metadata)<-metadata$id
metadata
metadata<-metadata[,2:4]

library(ggforce)



cond="3ug"
#  cond="100ng"
time1="48h"
time2="0h"
sample1="Ctrl"
sample2="A3A"
#  fac1="0h"
#  fac2=i
vol_meta<-metadata[metadata$type=="A3A" ,]
#vol_meta<-metadata[metadata$dose==cond ,]

vol_meta<-vol_meta[vol_meta$time==time1 |vol_meta$time==time2,]
vol_meta<-vol_meta[!grepl("48hr",rownames(vol_meta)),]
#  vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
txi<-txi.rsem
txi
vol_txi.rsem$length[vol_txi.rsem$length == 0] <- 1

rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
#vol_files_to_read %>%length()
vol_txi.rsem
vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~time)

vol_dds<-DESeq(vol_dds)
#vol_res<-results(vol_dds)
fac2
fac1
vol_res<-results(vol_dds,contrast=c("time","48h","0h"))

df<-cbind(vol_res$log2FoldChange,vol_res$padj,rownames(vol_res))%>%as.tibble
colnames(df)<-c("log2FoldChange","padj","gene")
#df%>%filter(p)
df$log2FoldChange<-as.double(df$log2FoldChange)
df$padj<-as.double(df$padj)
is.double(df$log2FoldChange)
df<-df%>%mutate(threshold=ifelse(log2FoldChange>1.5&-log10(padj)>1.3,
                                 "increase",
                                 ifelse(log2FoldChange<(-1.5) & -log10(padj)>1.3,"decrease","NA")))
#df%>%filter(threshold=="decrease")%>%arrange(-log2FoldChange)
#$rownames(df)<-rownames(vol_res)
col<-c("blue","red","#808080")
df<-df %>%arrange(padj)
df$padj[-log10(df$padj)>300]<-(10^-300)

ggplot(df%>%filter(!is.na(padj)),) +
  geom_point(aes(x=log2FoldChange, y=-log10(padj),col=threshold)) +
  ggtitle(paste("A3A_1st_C3","48h","3ug","0h","vs","48h",sep="_")) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  #    ylim(c(0,400))+
  #    xlim(c(-30,30))+
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = 1.33, colour="#990000", linetype="dashed") +
  geom_vline(xintercept = 1.5, colour="#990000", linetype="dashed") + geom_vline(xintercept = -1.5, colour="#990000", linetype="dashed")+
  geom_text_repel(data=head(df,70),aes(x=log2FoldChange,y=-log10(padj),label=gene),max.overlaps = 100,size=2.5,box.padding =0.5)+
  scale_color_manual(values=col)


ggsave(paste0(paste("A3A_1st_C3","48h","3ug","0h","vs","48h",sep="_"),".pdf"))


##volcano by ggplot##
#####################
### A3B vs A3A  ######
#####################

files_to_read <- list.files(path = "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A",pattern = "genes.results$",full.names = T)
#files_to_read <- list.files(path= "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/02_rsem/rsem/",pattern="genes.results$",full.names=T)
#files_to_read<-files_to_read[1:8]


names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))
names(files_to_read)


metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A/metadata.txt")
metadata<-metadata%>%as.data.frame
rownames(metadata)<-metadata$id
metadata
metadata<-metadata[,2:4]

library(ggforce)



cond="3ug"
#  cond="100ng"
time="48h"
sample1="A3B"
sample2="A3A"
#  fac1="0h"
#  fac2=i
vol_meta<-metadata[metadata$dose==cond ,]
vol_meta<-vol_meta[vol_meta$type==sample2 | vol_meta$type==sample1,]
vol_meta<-vol_meta[vol_meta$time==time ,]
#  vol_meta<-vol_meta[vol_meta$time==fac2 | vol_meta$time==fac1,]
vol_meta
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
txi<-txi.rsem
txi
vol_txi.rsem$length[vol_txi.rsem$length == 0] <- 1

rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
#vol_files_to_read %>%length()
vol_txi.rsem
vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~type)

vol_dds<-DESeq(vol_dds)
#vol_res<-results(vol_dds)
fac2
fac1
vol_res<-results(vol_dds,contrast=c("type",sample2,sample1))

df<-cbind(vol_res$log2FoldChange,vol_res$padj,rownames(vol_res))%>%as.tibble
colnames(df)<-c("log2FoldChange","padj","gene")
#df%>%filter(p)
df$log2FoldChange<-as.double(df$log2FoldChange)
df$padj<-as.double(df$padj)
is.double(df$log2FoldChange)
df<-df%>%mutate(threshold=ifelse(log2FoldChange>1.5&-log10(padj)>1.3,
                                 "increase",
                                 ifelse(log2FoldChange<(-1.5) & -log10(padj)>1.3,"decrease","NA")))
#df%>%filter(threshold=="decrease")%>%arrange(-log2FoldChange)
#$rownames(df)<-rownames(vol_res)
col<-c("blue","red","#808080")
df<-df %>%arrange(padj)
df$padj[-log10(df$padj)>300]<-(10^-300)

ggplot(df%>%filter(!is.na(padj)),) +
  geom_point(aes(x=log2FoldChange, y=-log10(padj),col=threshold)) +
  geom_text_repel(data=head(df%>%filter(threshold!="NA"),70),aes(x=log2FoldChange,y=-log10(padj),label=gene,colours="black"),max.overlaps = 100,size=2.5,box.padding =0.5)+
  ggtitle(paste("A3B_1st_C5",time,cond,"A3A","vs","A3B",sep="_")) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  #    ylim(c(0,400))+
  #    xlim(c(-30,30))+
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = 1.33, colour="#990000", linetype="dashed") +
  geom_vline(xintercept = 1.5, colour="#990000", linetype="dashed") + geom_vline(xintercept = -1.5, colour="#990000", linetype="dashed")+
  
  scale_color_manual(values=col)


#  facet_zoom(ylim = c(0, 100),xlim=c(-10,10))

ggsave(paste0(paste("A3B_1st_C5",time,cond,"A3A","vs","A3B",sep="_"),".pdf"))






p<-ggplot(df,aes(x=log2FoldChange, y=-log10(padj),label=gene,col=threshold)) +
  geom_point() +
  #  geom_hline(yintercept = 1.33, colour="#990000", linetype="dashed") +
  #  geom_vline(xintercept = 1, colour="#990000", linetype="dashed") + geom_vline(xintercept = -1, colour="#990000", linetype="dashed")+
  #  geom_text_repel(data=head(df,60),aes(label=gene),max.overlaps = 100)+
  scale_color_manual(values=col)

##volcano by ggplot##
#####################
### 0h vs 12h  ######
#####################

files_to_read <- list.files(path = "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/08_A3B_A3A",pattern = "genes.results$",full.names = T)
#files_to_read <- list.files(path= "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/02_rsem/rsem/",pattern="genes.results$",full.names=T)
#files_to_read<-files_to_read[1:8]


names(files_to_read)<-gsub(".genes.results","",basename(files_to_read))


cond="100ng"
sample="C5"
fac1="0h"
fac2="12h"
vol_meta<-metadata[metadata$dose==cond ,]
vol_meta<-vol_meta[vol_meta$sample==sample,]
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
txi<-txi.rsem
vol_txi.rsem$length[vol_txi.rsem$length == 0] <- 1

rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
#vol_files_to_read %>%length()
vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~condition)

vol_dds<-DESeq(vol_dds)
#vol_res<-results(vol_dds)

vol_res<-results(vol_dds,contrast=c("condition",fac2,fac1))

df<-cbind(vol_res$log2FoldChange,vol_res$padj,rownames(vol_res))%>%as.tibble
colnames(df)<-c("log2FoldChange","padj","gene")
#df%>%filter(p)
df$log2FoldChange<-as.double(df$log2FoldChange)
df$padj<-as.double(df$padj)
is.double(df$log2FoldChange)
df<-df%>%mutate(threshold=ifelse(log2FoldChange>1&-log10(padj)>1.3,
                                 "increase",
                                 ifelse(log2FoldChange<(-1) & -log10(padj)>1.3,"decrease","NA")))
#df%>%filter(threshold=="decrease")%>%arrange(-log2FoldChange)
#$rownames(df)<-rownames(vol_res)
col<-c("blue","red","#808080")
df<-df %>%arrange(padj)
df$padj[-log10(df$padj)>300]<-(10^-300)

ggplot(df,aes(x=log2FoldChange, y=-log10(padj),label=gene,col=threshold)) +
  geom_point() +
  ggtitle(paste("A3B_1st",sample,cond,fac1,"vs",fac2,sep="_")) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  #scale_y_continuous(limits = c(0,50)) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))  +
  geom_hline(yintercept = 1.33, colour="#990000", linetype="dashed") +
  geom_vline(xintercept = 1, colour="#990000", linetype="dashed") + geom_vline(xintercept = -1, colour="#990000", linetype="dashed")+
  geom_text_repel(data=head(df,70),aes(label=gene),max.overlaps = 100,size=3)+
  scale_color_manual(values=col)

ggsave(paste0(paste("A3B_1st",sample,cond,fac1,"vs",fac2,sep="_"),".pdf"))


p<-ggplot(df,aes(x=log2FoldChange, y=-log10(padj),label=gene,col=threshold)) +
  geom_point() +
  #  geom_hline(yintercept = 1.33, colour="#990000", linetype="dashed") +
  #  geom_vline(xintercept = 1, colour="#990000", linetype="dashed") + geom_vline(xintercept = -1, colour="#990000", linetype="dashed")+
  #  geom_text_repel(data=head(df,60),aes(label=gene),max.overlaps = 100)+
  scale_color_manual(values=col)





p
library(crosstalk, lib.loc = "/home/users/kjyi/R/x86_64-redhat-linux-gnu-library/3.6/")

library(plotly, lib.loc = "/home/users/kjyi/R/x86_64-redhat-linux-gnu-library/3.6/")
ggplotly(p)
##complex heatmap###
####################
vsd<-vst(dds)
assay(vsd)


#rownames(assay(vsd))[grepl("TP53$",rownames(assay(vsd)))]

#gene_list<-c("TP53$","CDKN1A$","CDKN1B$","CCND1$","CDK4$","CCNE1$","CDK2$","CCNA1$","MKI67$","CDKN2B$","CDKN2A$")
gene_list<-c("CDKN2B$","CDKN2C$","CDKN2A$","CDKN2D$","CCND1$","CDK6$","CDK4$","CCNE2$","CDK2$","CDKN1A$","CDKN1B$","CDKN1C$","CCNA1$","CCNA2$","CDK1$","CCNB1$","BCL2$","CASP3$","MKI67$")

x = rownames(assay(vsd))[grepl(paste(gene_list,collapse="|"),rownames(assay(vsd)))]
x
test<-assay(vsd)[x,]
test
test<-as.matrix(test)
2^test
test
col_order<-colnames(test)[c(1,2,3,4,5,6,7,8,9,10,11,12,16,17,18,13,14,15,19,20,21,25,26,27,22,23,24)]
#col_order<-
test
ComplexHeatmap::Heatmap(test,name="mat",
                        column_order=col_order)

heat_df<-test %>%as_tibble()
heat_df<-heat_df%>%mutate(gene_id=rownames(test))
heat_df<-heat_df%>%gather(id,norm.count,1:27)
heat_df<-heat_df%>%mutate(group=ifelse(grepl("Ctrl",id),"Ctrl","C5"))
heat_df<-heat_df%>%mutate(condition=ifelse(grepl("0h",id),"0h",ifelse(grepl("24h",id),"24h",ifelse(grepl("3ug",id),"48h_3ug","48h"))))
heat_df
heat_df$condition
ggplot(heat_df,aes(x=condition,y=norm.count,fill=group,col=group))+
  geom_boxplot()+
  facet_wrap(~gene_id,scales = "free")
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

#### plot with TPM####
####################

all_files <- lapply(files_to_read,function(x) {
  read_tsv(file = x)
})

exp_df<-lapply(all_files,function(x) x%>% select(TPM)) %>%
  do.call(cbind,.)
colnames(exp_df)<-gsub(".genes.results","",basename(files_to_read))


exp_df<-exp_df%>%as_tibble()

exp_df<-exp_df%>%mutate(gene_id=rownames(txi.rsem$abundance))

exp_df
exp_df %>% filter(gene_id %in% gene_list)

y= exp_df$gene_id[grepl(paste(gene_list,collapse="|"),exp_df$gene_id)]
y
exp_df<-exp_df%>%filter(gene_id %in% y)
exp_df
exp_df<-exp_df%>%gather(id,TPM,1:27)
exp_df<-exp_df%>%mutate(group=ifelse(grepl("Ctrl",id),"Ctrl","C5"))
exp_df<-exp_df%>%mutate(condition=ifelse(grepl("0h",id),"0h",
                                         ifelse(grepl("12h",id),"12h",
                                                ifelse(grepl("24h",id),"24h",
                                                       ifelse(grepl("36h",id),"36h",
                                                              ifelse(grepl("3ug",id),
                                                                     "48h_3ug","48h")
                                                       )
                                                )
                                         )
)
)
#exp_df$gene_id<-factor(exp_df$gene_id,levels=y)
exp_df
ggplot(exp_df,aes(x=condition,y=TPM,fill=group,col=group))+
  geom_boxplot()+
  facet_wrap(~gene_id,scales = "free")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))




###vol###
#########
#dds<-DESeq(dds,minReplicatesForReplace=Inf)
#res<-results(dds,cooksCutoff-FALSE,independentfiltering=FALSE)
vol_meta<-metadata[metadata$dose=="100ng" ,]
vol_meta<-vol_meta[vol_meta$sample=="Ctrl",]
vol_meta
txi.rsem$counts
vol_files_to_read<-files_to_read[names(files_to_read) %in% rownames(vol_meta)]
vol_files_to_read
vol_txi.rsem<-tximport(vol_files_to_read,type="rsem",txIn=FALSE,txOut=FALSE)
#vol_txi.rsem$counts
txi<-txi.rsem
vol_txi.rsem$length[vol_txi.rsem$length == 0] <- 1

rownames(vol_txi.rsem$abundance)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$abundance))
rownames(vol_txi.rsem$counts)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$counts))
rownames(vol_txi.rsem$length)<-gsub("ENSG[0-9]*_","",rownames(vol_txi.rsem$length))
(vol_txi.rsem)
vol_txi.rsem
#vol_files_to_read %>%length()
vol_dds<-DESeqDataSetFromTximport(vol_txi.rsem,
                                  colData=vol_meta,
                                  design=~condition)
vol_dds

vol_dds<-DESeq(vol_dds)
#vol_res<-results(vol_dds)

vol_res<-results(vol_dds,contrast=c("condition","0h","48h"))

vol_res<-lfcShrink(vol_dds,contrast=c("condition","0h","48h"),res=vol_res,type='normal')
EnhancedVolcano(vol_res,lab=rownames(vol_res),
                x="log2FoldChange",
                y="pvalue",
                pointSize=1,
                labSize=2.0,
                colAlpha=1,
                legendPosition='right',
                legendLabSize=12,
                title="0h~48h",
                drawConnectors = TRUE,
                widthConnectors = 0.1
                
)

EnhancedVolcano(vol_res,
                lab = rownames(vol_res),
                x = 'log2FoldChange',
                y = 'pvalue',
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 10e-32,
                FCcutoff = 2.0,
                pointSize = 4.0,
                labSize = 6.0,
                colAlpha = 1,
                legendPosition = 'right',
                legendLabSize = 12,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.75)

#######highly_variable_genes#######
vol_res<-results(vol_dds,contrast=c("condition","0h","48h"),tidy=TRUE)

find.significant.genes <- function(de.test.result,alpha=0.05) {
  # filter out significant genes based on FDR adjusted p-values
  filtered <- de.test.result[(de.test.result$padj < alpha) & !is.infinite(de.test.result$log2FoldChange) & !is.nan(de.test.result$log2FoldChange),]
  # order by p-value, and print out only the gene name, mean count, and log2 fold change
  sorted <- filtered[order(filtered$pval),c(1,2,6)]
}


de.genes <- find.significant.genes(vol_res)
de.genes.names<-de.genes%>%select(row)
de.genes.names$row<-gsub("[A-Z,0-9]*_","",de.genes.names$row)
rownames(de.genes.names) %>%length()

head(de.genes.names,n=100) %>%View()
de.genes.names$row[1:100] %>%view()

with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3,3)))
with(subset(res, padj<.01 ), points(log2FoldChange, -log10(pvalue), pch=20, col="blue"))
with(subset(res, padj<.01 & abs(log2FoldChange)>2), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))

all_files <- lapply(files_to_read,function(x) {
  read_tsv(file = x)
})

exp_df<-lapply(all_files,function(x) x%>% select(TPM)) %>%
  do.call(cbind,.)
colnames(exp_df)<-gsub(".genes.results","",basename(files_to_read))
exp_df
exp_df
all_files
gsub(".genes.results","",basename(files_to_read))
all_files
all_files
tmp_df<-lapply(all_files,function(x) x%>% dplyr::select(expected_count)) %>%
  do.call(cbind,.) %>%as.data.frame
tmp_df
tmp_df
gene_id=all_files[[1]]$gene_id
all_files[[1]]$gene_id
colnames(tmp_df)<-gsub(".genes.results","",basename(files_to_read))
tmp_df
rownames(tmp_df)<-all_files[[1]]$gene_id
tmp_df
tmp_df<-tmp_df%>%as_tibble()
tmp_df<-tmp_df%>%mutate(gene_id=gsub("[A-Z,0-9]*_","",all_files[[1]]$gene_id))
tmp_df
#write.table(tmp_df[1:11],)
#colnames(tmp_df)<-gsub(".genes.results","",basename(files_to_read))
#tot_df
#all_files[[1]]
#which(is.na(tmp_df))
tmp_df[is.na(tmp_df)]
sum(is.na(tmp_df))
tmp_df<-round(tmp_df,0)
tmp_df
tmp_df<-tmp_df+1
id<-all_files[[1]]%>%select(gene_id)
id$gene_id
tmp_df
add_df<-tmp_df%>%mutate(gene_id=id$gene_id)
add_df
tot_df<-add_df %>% select(gene_id, everything())
tot_df
#tot_df$gene_id<-gsub("[A-Z,0-9]*_","",tot_df$gene_id)
#tot_df
#tot_df$gene_id
#metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/PMA_RNAseq/02_star/rsem/Rdata/metadata.txt")
#metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/integrate/rsem/metadata.txt")

metadata
metadata[1:6,]
tot_df[,1:7]
tot_df[tot_df$gene_id == x,]
tot_df
estimateSizeFactorsForMatrix(tot_df)
tot_df<-tot_df%>%as.data.frame()
rownames(tot_df)<-tot_df$gene_id
tot_df<-tot_df[,2:28]
tot_df
#tot_df%>%as.matrix()
dds<-DESeqDataSetFromMatrix(countData=tot_df,
                            colData=metadata,
                            design=~sample, tidy=TRUE)
tot_df
####201024
rownames(tmp_df)<-id$gene_id
tmp_df
counts<-tmp_df
counts <- counts[rowSums(counts)>0,]
counts
nGenes <- length(counts[,1])

lib.size <- estimateSizeFactorsForMatrix(counts)
ed <- t(t(counts)/lib.size)
ed

means <- rowMeans(ed)
vars <- apply(ed,1,var)
cv2 <- vars/means^2 ##squared coefficient of variation
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9)
smoothScatter(log(means),log(cv2))

minMeanForFit <- unname( quantile( means[ which( cv2 > .3 ) ], .95 ) )
useForFit <- means >= minMeanForFit # & spikeins
fit <- glmgam.fit( cbind( a0 = 1, a1tilde = 1/means[useForFit] ),cv2[useForFit] )
a0 <- unname( fit$coefficients["a0"] )
a1 <- unname( fit$coefficients["a1tilde"])
fit$coefficients

# repeat previous plot
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9); smoothScatter(log(means),log(cv2));
xg <- exp(seq( min(log(means[means>0])), max(log(means)), length.out=1000 ))
vfit <- a1/xg + a0
# add fit line
lines( log(xg), log(vfit), col="black", lwd=3 )
df <- ncol(ed) - 1
# add confidence interval
lines(log(xg),log(vfit * qchisq(0.975,df)/df),lty=2,col="black")
lines(log(xg),log(vfit * qchisq(0.025,df)/df),lty=2,col="black")

afit <- a1/means+a0
varFitRatio <- vars/(afit*means^2)
varorder <- order(varFitRatio,decreasing=T)
oed <- ed[varorder,]
# save for the next exercise
save(oed,file="oed.RData")

# repeat previous plot
par(mar=c(3.5,3.5,1,1),mgp=c(2,0.65,0),cex=0.9); smoothScatter(log(means),log(cv2)); lines( log(xg), log(vfit), col="black", lwd=3 ); lines(log(xg),log(vfit * qchisq(0.975,df)/df),lty=2,col="black"); lines(log(xg),log(vfit * qchisq(0.025,df)/df),lty=2,col="black");
# add top 100 genes
points(log(means[varorder[1:101]]),log(cv2[varorder[1:101]]),col=2)

pval <- pchisq(varFitRatio*df,df=df,lower.tail=F)
adj.pval <- p.adjust(pval,"fdr")
sigVariedGenes <- adj.pval<1e-3;
table(sigVariedGenes)

m <- oed[1:101,]
#rownames(m)
m
heatmap(m/apply(m,1,max),zlim=c(0,1),col=gray.colors(100),Rowv=NA,Colv=NA,labRow=NA,scale="none",ColSideColors=ifelse(grepl("d",colnames(m)),"red","blue"))

write.table(gsub("[A-Z,a-z,0-9]*_","",rownames(m)),
            "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/integrate/rsem/de.hv.genes.txt",
            quote=F,
            row.names=F)




rownames(m)<-gsub("[A-Z,a-z,0-9]*_","",rownames(m))

cn = rownames(m)
Heatmap(log(m+0.001,2), show_row_names = FALSE,show_row_dend = FALSE,
        left_annotation = rowAnnotation(
          text = anno_text(cn, rot = 45, location = unit(1, "npc"), just = "right",gp=gpar(fontsize=5)),
          annotation_height = max_text_width(cn)
        )
)


m
Heatmap(m)
lib.size
coverage <- colSums(counts)/nGenes
ord <- order(groups)
coverage






rownames(tot_df)<-tot_df$gene_id

tot_df[,2:11]
count_df<-tot_df[,2:11]

lib.size <- estimateSizeFactorsForMatrix(count_df)
lib.size
ed <- t(t(count_df)/lib.size)
ed
#####







vst(dds)
vsd<-vst(dds)
assay(vsd)
rownames(assay(vsd))[grepl("TP53$",rownames(assay(vsd)))]

gene_list<-c("TP53$","CDKN1A$","CDKN1B$","CCND1$","CDK4$","CCNE1$","CDK2$","CCNA1$","MKI67$")
x = rownames(assay(vsd))[grepl(paste(gene_list,collapse="|"),rownames(assay(vsd)))]

test<-assay(vsd)[x,]
test<-as.matrix(test)
2^test
ComplexHeatmap::Heatmap(test,name="mat")

2^(assay(vsd)[x,]) %>% barplot



normalized_counts
View(counts(dds))
#tot_df+1
dds<-DESeq(dds)
res<-results(dds,tidy=TRUE)
res

res

find.significant.genes <- function(de.test.result,alpha=0.05) {
  # filter out significant genes based on FDR adjusted p-values
  filtered <- de.test.result[(de.test.result$padj < alpha) & !is.infinite(de.test.result$log2FoldChange) & !is.nan(de.test.result$log2FoldChange),]
  # order by p-value, and print out only the gene name, mean count, and log2 fold change
  sorted <- filtered[order(filtered$pval),c(1,2,6)]
}
de.genes <- find.significant.genes(res)
de.genes.names<-de.genes%>%select(row)
de.genes.names$row<-gsub("[A-Z,0-9]*_","",de.genes.names$row)
head(de.genes.names,n=500) %>%View()
de.genes.names$row[1:500]
write.table(head(de.genes.names,n=500),
            "/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/integrate/rsem/de.hv.genes.txt",
            quote=F,
            row.names=F)

grep("mCherry",de.genes.names$row)

gsub(de.genes.names$row
     #significant.genes <- which(p.values.adj<0.05) %>%select(row)
     
     #de.genes.pooled <- find.significant.genes(de.test.pooled)
     
     
     head(results(dds,tidy=TRUE))
     plotDispEsts(dds)
     ## volcano plot
     par(mfrow=c(1,1))
     with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3,3)))
     with(subset(res, padj<.01 ), points(log2FoldChange, -log10(pvalue), pch=20, col="blue"))
     with(subset(res, padj<.01 & abs(log2FoldChange)>2), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
     ## volcano plot.v2
     #vol_df<-cbind(res$row,res$log2FoldChange,res$pvalue)
     vol_df<-as_tibble(cbind(res$row,res$log2FoldChange,res$pvalue))
     colnames(vol_df)<-c("gene_id","log2FoldChange","pvalue")
     vol_df
     vol_df$pvalue<-as.double(vol_df$pvalue)
     vol_df$log2FoldChange<-as.double(vol_df$log2FoldChange)
     vol_df$gene_id<-gsub("[A-Z,0-9]*_","",tot_df$gene_id)
     
     ggplot(vol_df,aes(x=log2FoldChange,y=-log10(pvalue)))+
       geom_point() +
       geom_text(aes(label=gene_id,hjust=0,vjust=-1),size=2)#+
     xlim(-5,5)+
       ylim(0,50)
     
     
     #  geom_text(aes(label=gene_id,hjust=0,vjust=0))
     ##PCA
     vsd<-vst(dds,blind=FALSE)
     pltPCA(vsdata,intgroup="condition")
     library("genefilter")
     
     library("pheatmap")
     library("RColorBrewer")
     topVarGenes<-head(order(rowVars(assay(vsd)),decreasing=TRUE),500)
     rownames(assay(vsd)[topVarGenes,])
     mat  <- assay(vsd)[ topVarGenes, ]
     mat  <- mat - rowMeans(mat)
     anno <- as.data.frame(colData(vsd)[, c("id","condition")])
     pheatmap(mat, annotation_col = anno,fontsize_row=5)
     
     ##results
     res <- results(dds)
     table(res$padj<0.05)
     ## Order by adjusted p-value
     res <- res[order(res$padj), ]
     ## Merge with normalized count data
     resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
     names(resdata)[1] <- "Gene"
     head(resdata)
     
     library(calibrate)
     maplot <- function (res, thresh=0.05, labelsig=TRUE, textcx=1, ...) {
       with(res, plot(baseMean, log2FoldChange, pch=20, cex=.5, log="x", ...))
       with(subset(res, padj<thresh), points(baseMean, log2FoldChange, col="red", pch=20, cex=1.5))
       if (labelsig) {
         require(calibrate)
         with(subset(res, padj<thresh), textxy(baseMean, log2FoldChange, labs=Gene, cex=textcx, col=2))
       }
     }
     
     maplot <- function (res, thresh=0.05, labelsig=TRUE, textcx=1, ...) {
       with(res, plot(baseMean, log2FoldChange, pch=20, cex=.5, log="x", ...))
       with(subset(res, padj<thresh), points(baseMean, log2FoldChange, col="red", pch=20, cex=1.5))
       
     }
     #maplot
     library(tidyverse)
     library(dplyr)
     maplot(resdata, main="MA Plot")
     gene_df<-resdata %>%filter(padj<0.05,baseMean>50) %>%
       filter(log2FoldChange>2.5|log2FoldChange<(-2.5)) %>%dplyr::select(Gene)
     gene_df
     
     write.table(gsub("[A-Z,0-9]*_","",gene_df$Gene),"/home/users/ayh/Projects/27_A3B/03_sequencing/PMA_RNAseq/gene_list.txt",
                 row.names = FALSE,
                 quote = FALSE,
                 append = TRUE,
                 na = "NA"
     )
     
     ##complax heatmap
     cell_gene_list<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/PMA_RNAseq/p53_gene_list.txt")
     cell_gene_list
     tot_df
     all_files
     temp_df<-lapply(all_files,function(x) x%>% dplyr::select(TPM))%>%
       do.call(cbind,.) %>%as.data.frame
     temp_df
     colnames(temp_df)<-gsub(".genes.results","",basename(files_to_read))
     #temp_df
     dds_tmp <- estimateSizeFactors(dds)
     normalized_counts<-counts(dds_tmp,normalized=TRUE)
     normalized_counts<-assay(vsd)
     temp_df<-normalized_counts
     temp_df
     #id$gene_id
     temp_df<-temp_df%>%as_data_frame()
     colnames(temp_df)<-gsub(".genes.results","",basename(files_to_read))
     id$gene_id
     temp2_df<-temp_df%>%dplyr::mutate(gene_id=id$gene_id)
     #temp2_df
     #add_df
     temp3_df<-temp2_df %>% dplyr::select(gene_id, everything())
     #temp3_df
     temp3_df
     cell_Gene_list
     temp4_df<-temp3_df%>%dplyr::filter(gsub("[A-Z,0-9]*_","",gene_id)%in%head(cell_gene_list$Gene,100))
     temp4_df
     temp4_df$gene_id<-gsub("[A-Z,0-9]*_","",temp4_df$gene_id)
     temp4_df
     rownames(temp4_df)<-temp4_df$gene_id
     temp4_df
     temp5_df<-temp4_df[2:10]
     temp4_df
     #temp4_df<-temp_df[-1]
     as.matrix(sapply(temp4_df, as.numeric))
     mat<-data.matrix(temp5_df)
     rownames(mat)<-temp4_df$gene_id
     mat
     ###heatmap
     library(ComplexHeatmap)
     #Heatmap(mat)
     library(circlize)
     mat
     hm_col_fun=colorRamp2(c(0,8,15),c("blue","white","red"))
     hm<-Heatmap(mat, name = "mat", clustering_method_rows = "single", column_order = colnames(mat),col=hm_col_fun,
                 heatmap_legend_param =list(title="log2(normalized counts)",
                                            at = c(0,12),
                                            labels=c("0","12"),
                                            col_fun=circlize::colorRamp2(c(0,12),c("white","red")),
                                            legend_height=unit(3,"cm")
                                            
                 ))
     draw(hm)
     
     
     lgd<-Legend(         title="log2(normalized counts)",
                          at = c(0,12),
                          labels=c("0","12"),
                          col_fun=circlize::colorRamp2(c(0,12),c("white","red")),
                          legend_height=unit(3,"cm"))
     draw(hm,annotation_legend_list=lgd)
     grid.rect()
     draw(lgd)