library(RColorBrewer)
library(corrplot)
library(tidyverse)
library(pheatmap)

species <- "Fly"
group <- "emb"
thresh <- 277

clusterBed = str_c("/net/waterston/vol9/ChipSeqPipeline/All",species,"Peaks.TF.noDups.clusters.bed")
clustersDF <- read_tsv(clusterBed,col_names=FALSE)  # all clusters - not just for the specific group of interest (emb)
clustersDF <- rename(clustersDF,chromo=X1,start=X2,end=X3,cluster=X4,nPeaks=X5,strand=X6,apexS=X7,apexE=X8,color=X9)
clustersDF <- filter(clustersDF,nPeaks<=thresh&nPeaks>1) %>% select(cluster) 

tsv <- str_c("/net/waterston/vol9/ChipSeqPipeline/All",species,"Peaks.TF.noDups.clustered.ClusterBinaryMatrix_",group,".tsv") 
#tsv <- str_c("/net/waterston/vol9/ChipSeqPipeline/All",species,"Peaks.TF.noDups.clustered.ClusterBinaryMatrixByStage",".tsv") 
#tsv <- str_c("/net/waterston/vol9/ChipSeqPipeline/All",species,"Peaks.TF.noDups.clustered.ClusterBinaryMatrix",".tsv") 
df <- as.data.frame(read_tsv(file=tsv))  # TF occupancy for the clusters of TFs in the stage group (emb)
df <- rename(df,'CkIIalpha-i1'='CkIIα-i1')
pl <- pivot_longer(df,cols=2:ncol(df),names_to = "TF",values_to = "Occupancy")
g <- dplyr::group_by(pl,cluster) %>% dplyr::summarise(count=sum(Occupancy))
g <- filter(g,count>1) %>% select(-count)
g <- left_join(g,df)

j <- inner_join(clustersDF,df) 

cl <- j$cluster
j <- select(j,-cluster)

#df <- select(df,-cluster)

#df <- df %>% rowwise() %>% mutate(s = sum(c_across(1:length(df))))
#df <- filter(df,s > 1)  # remove clusters with only one tf
#df <- select(df,-s)


mat <- as.matrix(j)
pearson <- cor(mat,method = "pearson")
#row.names(pearson)

pearsonDF <- as.data.frame(pearson) %>% add_column(TF=row.names(pearson),.before=1)
df_tsv <- str_c("/net/waterston/vol9/ChipSeqPipeline/All",species,"Peaks.TF.noDups.clustered.pearson.277",group,".tsv") 
#df_tsv <- str_c("/net/waterston/vol9/ChipSeqPipeline/All",species,"Peaks.TF.noDups.clustered.pearson",".tsv") 
write_tsv(pearsonDF,file=df_tsv)

pdfFile <- str_c("/net/waterston/vol9/ChipSeqPipeline/",species,"CorrByStage.277",group,".pdf")
#pdfFile <- str_c("/net/waterston/vol9/ChipSeqPipeline/",species,"CorrByStage",".pdf")
pdf(file=pdfFile,width=90,height=90)
corrplot(pearson,method="color",type="lower",diag=FALSE,order="hclust")
dev.off()





pal = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(250)
pal[1] = "grey60"
pal = pal[c(1, 71:250)]
heat <- pheatmap(pearson, scale = "none", clustering_method = "ward.D2",
                   color = pal, onefile = T,
                   width = 100,height = 100,
                 filename = "/net/waterston/vol9/ChipSeqPipeline/AllFlyPeaks.TF.noDups.clustered.ClusterBinaryMatrix.84.pdf")
dev.off()



file=str_c("/net/waterston/vol6/",species,"TFPeaks_clusters_targets.rds")
k <- readRDS(file)

s <- select(rankPeak,chromo,start,end,exp,rank)
s <- mutate(s,peakID=str_c(exp,chromo,start,":",end)) %>% select(peakID,rank)

saveRDS(s,file=str_c("/net/waterston/vol6/",species,"SignalRank.rds"))

f <- readRDS(file="/net/waterston/vol6/WormTFPeaks_clusters_targets.rds")

pheatmap(pearson, scale = "none", clustering_method = "ward.D2",
                 color = pal, onefile = T,
                 width = 50,height = 50)

x11(width = 100, height = 100)


pheatmap(c,width = 500,height = 200)


