#looking for marker genes from the DEGage_C data
setwd("~/Documents/DEGage_stuff/DEGage_Testing/Heidegger")

library(reshape2)
library(ggplot2)
library(ggpubr)
library(patchwork)
library(Seurat)
#Truncated count processing for future seurat-generated figures - taken from "Heidegger analysis.R"
samples <- list.files("data/")

sample.list <- list()
for (i in 1:length(samples)){
  print(i)
  dir <- paste("data/", samples[i], "/", sep = "")
  sample.list[[i]] <- Read10X(data.dir = dir)
  sample.list[[i]] <- CreateSeuratObject(sample.list[[i]])
}

combined <- merge(sample.list[[1]], y = c(sample.list[[2]],sample.list[[3]],sample.list[[4]],sample.list[[5]],sample.list[[6]],sample.list[[7]],sample.list[[8]])  ,add.cell.ids = samples)

rm(sample.list)

combined[["percent.mt"]] <- PercentageFeatureSet(combined, pattern = "^MT-")
combined <- subset(combined, subset = nFeature_RNA > 200 & nFeature_RNA < 8000 & percent.mt < 20)

combined <- NormalizeData(combined)
combined <- ScaleData(combined, features = rownames(combined))
combined <- FindVariableFeatures(object = combined)


#identifying cell types sing SingleR
library(SingleR)
celltypesall <- as.SingleCellExperiment(combined)
hpca.se <- celldex::HumanPrimaryCellAtlasData()
pred.all<- SingleR(test = celltypesall, ref = hpca.se,
                   labels = hpca.se$label.main)

#Filtering out junk cells
pred.table <- table(pred.all$labels)
celllist <- levels(factor(pred.all$labels))
too.few <- vector()
for(i in 1:length(pred.table)){
  if (pred.table[[i]] < 20){
    too.few <- c(too.few, celllist[i])
  }
}
too.few = c(too.few, "iPS_cells")
badcells <- which(pred.all$labels %in% too.few  )
combined <- combined[,-badcells]
pred.all <- pred.all[-badcells,]
combined$celltype <- pred.all$labels

celltypes <- levels(factor(combined$celltype))
Idents(combined) <- combined$celltype

ni <- which(substr(colnames(combined),3,3) == "n")
ti <- which (substr(colnames(combined),3,3) == "t")
group.pos.neg <- rep(0, ncol(combined))
group.pos.neg[ni] = 1

neg_seurat <- combined[,group.pos.neg == 1]
pos_seurat <- combined[,group.pos.neg == 0]

## LOADING IN DATA ################################################################################33

celltypes <- c("B_cell", "Chondrocytes", "CMP", "DC", "Endothelial_cells", "Epithelial_cells",
               "Fibroblasts", "Hepatocytes", "HSC_CD34+", "Macrophage", "Monocyte",
               "Neutrophils", "NK_cell", "Pre-B_cell_CD34-", "Smooth_muscle_cells",
               "T_cells", "Tissue_stem_cells")
celltypes_for_figures <- c("B cells", "Chondrocytes","CMPs","Dendritic cells","Endothelial cells",
                           "Epithelial cells","Fibroblasts","Hepatocytes","HSC CD34+ Cells",
                           "Macrophages","Monocytes","Neutrophils","NK cells","Pre-B cells CD34-",
                           "Smooth muscle cells","T cells","Tissue stem cells")

FDR = 0.05
permpval = 0.05

#positive cell data
posfiles <- list.files("DEGage_C/pos_celltype_comparisons/")
poslist <- list()

for(i in 1:(length(celltypes)-1)){
  for(j in (i+1):length(celltypes)){
    index <- which(grepl(celltypes[i], posfiles) & grepl(celltypes[j], posfiles))[1]
    temp <- read.delim(paste("DEGage_C/pos_celltype_comparisons/", posfiles[index], sep = ""), sep = ",")
    rownames(temp) <- temp$X
    temp <- temp[,-1]
    temp <- temp[!(is.na(temp$mu1) | is.na(temp$mu2)),]
    temp <- temp[!is.na(temp$pval),]
    temp <- temp[temp$FDR < FDR,]
    temp <- temp[temp$permPvals < permpval,]
    poslist[[index]] <- temp
  }
}

#negative cell data
negfiles <- list.files("DEGage_C/neg_celltype_comparisons/")
neglist <- list()

for(i in 1:(length(celltypes)-1)){
  for(j in (i+1):length(celltypes)){
    index <- which(grepl(celltypes[i], negfiles) & grepl(celltypes[j], negfiles))[1]
    temp <- read.delim(paste("DEGage_C/neg_celltype_comparisons/", negfiles[index], sep = ""), sep = ",")
    rownames(temp) <- temp$X
    temp <- temp[,-1]
    temp <- temp[!(is.na(temp$mu1) | is.na(temp$mu2)),]
    temp <- temp[!is.na(temp$pval),]
    temp <- temp[temp$FDR < FDR,]
    temp <- temp[temp$permPvals < permpval,]
    neglist[[index]] <- temp
  }
}

#posvneg comparisons

posvnegfiles <- list.files("DEGage_C/pos_v_neg_by_celltype/")
posvneglist <- list()

for(i in 1:length(celltypes)){
  index <- which(grepl(celltypes[i], posvnegfiles))[1]
  temp <- read.delim(paste("DEGage_C/pos_v_neg_by_celltype/", posvnegfiles[index], sep = ""), sep = ",")
  rownames(temp) <- temp$X
  temp <- temp[,-1]
  temp <- temp[!(is.na(temp$mu1) | is.na(temp$mu2)),]
  temp <- temp[!is.na(temp$pval),]
  temp <- temp[temp$FDR < FDR,]
  temp <- temp[temp$permPvals < permpval,]
  posvneglist[[index]] <- temp
}



## LOOKING AT NUMBER OF DEGS ###################################################################################

#pos and neg cells separately
postempmatrix <- matrix(data = rep(0, 17**2 ), nrow = 17, ncol = 17)
for(i in 1:(length(celltypes)-1)){
  for(j in (i+1):length(celltypes)){
    index <- which(grepl(celltypes[i], posfiles) & grepl(celltypes[j], posfiles))[1]
    ndegs <- nrow(poslist[[index]])
    postempmatrix[i, j] <- ndegs
  }
}
rownames(postempmatrix) <- celltypes_for_figures
colnames(postempmatrix) <- celltypes_for_figures
melted_postempmatrix <- melt(postempmatrix)

negtempmatrix <- matrix(data = rep(0, 17**2 ), nrow = 17, ncol = 17)
for(i in 1:(length(celltypes)-1)){
  for(j in (i+1):length(celltypes)){
    index <- which(grepl(celltypes[i], negfiles) & grepl(celltypes[j], negfiles))[1]
    ndegs <- nrow(neglist[[index]])
    negtempmatrix[i,j] <- ndegs
  }
}
rownames(negtempmatrix) <- celltypes_for_figures
colnames(negtempmatrix) <- celltypes_for_figures
melted_negtempmatrix <- melt(negtempmatrix)

posplot <- ggplot(melted_postempmatrix, aes(x = Var1, y = Var2, fill = value))+
  geom_tile()+
  scale_fill_gradient(low = "white", high = "steelblue")+
  guides(fill = guide_colourbar(title = "# of DEGs"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5))+
  labs(x = element_blank(), y = element_blank())+
  ggtitle("Tumor Cells")

negplot <- ggplot(melted_negtempmatrix, aes(x = Var1, y = Var2, fill = value))+
  geom_tile()+
  scale_fill_gradient(low = "white", high = "steelblue")+
  guides(fill = guide_colourbar(title = "# of DEGs"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5))+
  labs(x = element_blank(), y = element_blank())+
  ggtitle("Healthy Cells")

ndeg_heatmap <- ggarrange(posplot, negplot, ncol = 2, common.legend =  TRUE, legend = "right")

#posvneg comparisons
posvneg_ndegs <- vector()
for(i in 1:length(posvneglist)){
  posvneg_ndegs[i] <- nrow(posvneglist[[i]])
}

posvneg_ndeg_plotdf <- data.frame(cell = celltypes_for_figures, ndegs = posvneg_ndegs)
posvneg_ndeg_plotdf <- posvneg_ndeg_plotdf[order(posvneg_ndeg_plotdf$ndegs, decreasing = TRUE),]
posvneg_ndeg_barplot <- ggplot(posvneg_ndeg_plotdf, aes(x = factor(cell, levels = cell), y = ndegs))+
  geom_bar(stat = "identity", fill = "grey", color = "black")+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)) +
  labs(x = element_blank(), y = "# of Degs")



## LOOKING FOR MARKERS IN POSVNEG ##################################################################

#params for analysis
nHVCT = 6 # # of highly varaible celltypes
nsame_HVCT = 2 # #of celltypes a marker must be present in

#retreiving degs from HVCTS

HVCTs <- vector() #index of HVCTS in celltypes
HVCT_degs <- list() #degs in list separated by celltype
posvneg_markers <- vector() #at this point, just contains a list of all degs
for( i in 1:nHVCT){
  HVCTs[i] <-grep(posvneg_ndeg_plotdf$cell[i], celltypes_for_figures)
  HVCT_degs[[i]] <- rownames(posvneglist[[HVCTs[i]]])
  posvneg_markers <- c(posvneg_markers, HVCT_degs[[i]])
}

#looking for genes that are in at least nsame_HVCT number of celltypes
posvneg_marker_table <- table(posvneg_markers)[table(posvneg_markers) >= nsame_HVCT]
posvneg_markers <- labels(posvneg_marker_table)$posvneg_markers

#prepping Seurat object
pos_posvneg_seurat <- subset(pos_seurat, idents = celltypes[HVCTs])
neg_posvneg_seurat <-   subset(neg_seurat, idents = celltypes[HVCTs])
Idents(pos_posvneg_seurat) <- paste(Idents(pos_posvneg_seurat), "-Pos", sep = "")
Idents(neg_posvneg_seurat) <- paste(Idents(neg_posvneg_seurat), "-Neg", sep = "")
Idents(pos_posvneg_seurat) <- gsub("_", " ", Idents(pos_posvneg_seurat))
Idents(neg_posvneg_seurat) <- gsub("_", " ", Idents(neg_posvneg_seurat))
Idents(pos_posvneg_seurat) <- gsub("DC", "Dendritic Cells", Idents(pos_posvneg_seurat))
Idents(neg_posvneg_seurat) <- gsub("DC", "Dendritic Cells", Idents(neg_posvneg_seurat))

z <- merge(pos_posvneg_seurat, neg_posvneg_seurat)
z <- JoinLayers(z)

#violin plot
select_genes <- c("NPY", "WDR13", "TESC", "CACNA1D", "COA6",
                  "FXYD6", "KCNE4", "LURAP1L", "MT1M", "OLFM2",
                  "PARVA", "PDE3A", "TCIM", "HOXB13", "AKR1C3",
                  "GRB10", "MT1H", "SPON2", "THSD7A" )
select_genes <- select_genes[order(select_genes)]

#posvneg_markers <- posvneg_markers[!(posvneg_markers %in% select_genes)]
marker_violin <- VlnPlot(z, features = select_genes, stack = TRUE, same.y.lims = TRUE)+
  theme(legend.position = "none",plot.title = element_text(hjust = 0.5))+
  labs(y = element_blank())
#VlnPlot(z, features = posvneg_markers, stack = TRUE, same.y.lims = TRUE)+
#    theme(legend.position = "none",plot.title = element_text(hjust = 0.5))+
#    labs(y = element_blank())

DotPlot(z, features = posvneg_markers)+
  theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
        plot.title = element_text(hjust = 0.5))+
  labs(x = element_blank(), y = element_blank())


## LOOKING FOR MARKERS IN POS AND NEG SEPARATELY #####################################################

#params for analysis
nsame = 8

#getting markers for pos cells
posmarkerlist <- list()
posmarkervec <- vector()
for(i in 1:length(celltypes)){ #getting genes in each cell type at least nsame number of times
    k <- grep(celltypes[i], posfiles)
    if(celltypes[i] == "B_cell"){ #conflict between bcells and pre b cells
    k <- grep("Pre", posfiles[k], invert = TRUE) #only works because B cell are first
    }

    for(j in 1:length(k)){
      if( j == 1){
        posmarkerlist[[i]] <- rownames(poslist[[k[j]]])
      }else{
        posmarkerlist[[i]] <- c(posmarkerlist[[i]], rownames(poslist[[k[j]]]))
      }
    }
    temptable <- table(posmarkerlist[[i]])
    temptable <- temptable[temptable > nsame]
    if(length(temptable) < 1){
      posmarkerlist[[i]] <- NA
    }else{
    posmarkerlist[[i]] <- labels(temptable)[[1]]
    posmarkerlist[[i]] <- posmarkerlist[[i]][!grepl("MT-", posmarkerlist[[i]])]
    }
    posmarkervec <- c(posmarkervec, posmarkerlist[[i]])
}
posmarkervec <- unique(posmarkervec)
posmarkervec <- posmarkervec[!is.na(posmarkervec)]

#getting markers for neg cells
negmarkerlist <- list()
negmarkervec <- vector()
for(i in 1:length(celltypes)){ #getting genes in each cell type at least nsame number of times
  k <- grep(celltypes[i], negfiles)
  if(celltypes[i] == "B_cell"){ #conflict between bcells and pre b cells
    k <- grep("Pre", negfiles[k], invert = TRUE) #only works because B cell are first
  }

  for(j in 1:length(k)){
    if( j == 1){
      negmarkerlist[[i]] <- rownames(neglist[[k[j]]])
    }else{
      negmarkerlist[[i]] <- c(negmarkerlist[[i]], rownames(neglist[[k[j]]]))
    }
  }
  temptable <- table(negmarkerlist[[i]])
  temptable <- temptable[temptable > nsame]
  if(length(temptable) < 1){
    negmarkerlist[[i]] <- NA
  }else{
    negmarkerlist[[i]] <- labels(temptable)[[1]]
    negmarkerlist[[i]] <- negmarkerlist[[i]][!grepl("MT-", negmarkerlist[[i]])]
  }
  negmarkervec <- c(negmarkervec, negmarkerlist[[i]])
}
negmarkervec <- unique(negmarkervec)
negmarkervec <- negmarkervec[!is.na(negmarkervec)]

#Supplementary marker gene plot - positive
posplotlist <- list()
plot = 1
widths = c()

pos_selectgenes <- list()
blacklist_genes <- c("MALAT1")

breaks = c( 1, 4, 7, 11, 13, 16, 17)
for(i in 1:length(posmarkerlist)){
    posmarkerlist[[i]] <- posmarkerlist[[i]][!(posmarkerlist[[i]] %in% blacklist_genes)]
    posmarkerlist[[i]] <- posmarkerlist[[i]][order(posmarkerlist[[i]], decreasing = FALSE)]
    if(length(posmarkerlist[[i]]) > 1){

      if(plot %in% breaks){
        posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]], dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
          theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                plot.title = element_text(hjust = 0.5))+
          labs(x = element_blank(), y = element_blank())+
        ggtitle(celltypes_for_figures[i])
      } else{
      posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
        theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
              plot.title = element_text(hjust = 0.5),
              axis.text.y=element_blank() )+
        labs(x = element_blank(), y = element_blank())+
      ggtitle(celltypes_for_figures[i])
      }
      plot = plot +1
      widths <- c(widths, length(posmarkerlist[[i]]))

    }else{
      if(length(posmarkerlist[[i]]) != 0){
      if(!is.na(posmarkerlist[[i]])){
        if(plot %in% breaks){
          posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5))+
            labs(x = element_blank(), y = element_blank())+
          ggtitle(celltypes_for_figures[i])
        } else if(length(posmarkerlist[[i]]) != 0) {
          posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5),
                  axis.text.y=element_blank() )+
            labs(x = element_blank(), y = element_blank())+
          ggtitle(celltypes_for_figures[i])
        }
        plot = plot +1
        widths <- c(widths, length(posmarkerlist[[i]]))
      }
      }
    }
}

#rough plot
#ggarrange(plotlist = posplotlist, common.legend = TRUE)

#format for when decided for final plot
posmarkerplot_all <- (posplotlist[[1]] + posplotlist[[2]] + posplotlist[[3]]  + plot_layout(widths = c(.8,1.2,1))) /
(posplotlist[[4]] + posplotlist[[5]] + posplotlist[[6]]   + plot_layout( widths = c(1,1,1))) /
(posplotlist[[7]] + posplotlist[[10]]  + plot_layout( widths = c(1,2))) /
(posplotlist[[11]] + posplotlist[[12]]+ plot_layout( widths = c(2,1))) /
(posplotlist[[13]] + posplotlist[[15]]   + plot_layout( widths = c(1,2))) /
posplotlist[[16]] / posplotlist[[17]] +
  plot_layout(guides = "collect")

#ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/posmarkers_all.svg", plot = posmarkerplot_all, width =20 , height = 30 ,units ="in" )
#ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/posmarkers_all.pdf", plot = posmarkerplot_all, width =20 , height = 30 ,units ="in" )


#supplementary marker gene plot - negative
negplotlist <- list()
plot = 1
widths = c()

neg_selectgenes <- list()
blacklist_genes <- c("MALAT1")
breaks = c( 1, 4, 7, 11, 13, 16, 17)
for(i in 1:length(negmarkerlist)){
  negmarkerlist[[i]] <- negmarkerlist[[i]][!(negmarkerlist[[i]] %in% blacklist_genes)]
  negmarkerlist[[i]] <- negmarkerlist[[i]][order(negmarkerlist[[i]], decreasing = FALSE)]
  if(length(negmarkerlist[[i]]) > 1){

    if(plot %in% breaks){
      negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]], dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
        theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
              plot.title = element_text(hjust = 0.5))+
        labs(x = element_blank(), y = element_blank())+
        ggtitle(celltypes_for_figures[i])
    } else{
      negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
        theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
              plot.title = element_text(hjust = 0.5),
              axis.text.y=element_blank() )+
        labs(x = element_blank(), y = element_blank())+
        ggtitle(celltypes_for_figures[i])
    }
    plot = plot +1
    widths <- c(widths, length(negmarkerlist[[i]]))

  }else{
    if(length(negmarkerlist[[i]]) != 0){
      if(!is.na(negmarkerlist[[i]])){
        if(plot %in% breaks){
          negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5))+
            labs(x = element_blank(), y = element_blank())+
            ggtitle(celltypes_for_figures[i])
        } else if(length(negmarkerlist[[i]]) != 0) {
          negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5),
                  axis.text.y=element_blank() )+
            labs(x = element_blank(), y = element_blank())+
            ggtitle(celltypes_for_figures[i])
        }
        plot = plot +1
        widths <- c(widths, length(negmarkerlist[[i]]))
      }
    }
  }
}

#rough plot
#ggarrange(plotlist = negplotlist, common.legend = TRUE)

#format for when decided for final plot
negmarkerplot_all <- (negplotlist[[1]] + negplotlist[[2]] + negplotlist[[3]]  + plot_layout(widths = c(.8,1.2,1))) /
  (negplotlist[[4]] + negplotlist[[5]] + negplotlist[[6]] + negplotlist[[10]]  + plot_layout( widths = c(1,1,1,1))) /
  (negplotlist[[7]]  + plot_layout( widths = c(1))) /
  (negplotlist[[11]] + negplotlist[[12]]+ plot_layout( widths = c(2,1))) /
  (negplotlist[[13]] + negplotlist[[15]]   + plot_layout( widths = c(1,2))) /
  negplotlist[[16]] / negplotlist[[17]] +
  plot_layout(guides = "collect")
negmarkerplot_all
#ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/negmarkers_all.svg", plot = negmarkerplot_all, width =20 , height = 30 ,units ="in" )
#ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/negmarkers_all.pdf", plot = negmarkerplot_all, width =20 , height = 30 ,units ="in" )

#same as posplot, just with selected genes

posplotlist<- list()
plot = 1
widths = c()

blacklist_genes <- c("MALAT1")
posselect_list <- list(c(1,6:14), #bcell
                       c(1,2,5,6,8,12,13,14,16,18,21,22,23,28,29,30,31), #chondrocytes
                       c(21), #cmps
                       c(1:12), #dendritic
                       c(1,5,6,7,10), #endothelial
                       c(2,7,8,9,13), #epithelial
                       c(1, 7,10 ), #fibroblasts
                       c(), #hepatocytes
                       c(), #hsc
                       c(1:9, 11:50), #macrophages
                       c(2:6, 8:24, 26:32, 35, 36), #monocytes
                       c(), #nothing for neutrophils :(
                       c(2,3,9), #nk
                       c(), #preb
                       c(4,8,9,10,12,13,14,18,20), #smooth muscle
                       c(9:13, 55 ), #tcell
                       c(1,6,7,8,13, 16,19,20,23:29, 34, 36:42),#tissue stems
                       c()
                       )

breaks = c( 1, 5,8,9,12)
for(i in 1:length(posmarkerlist)){
  posmarkerlist[[i]] <- posmarkerlist[[i]][!(posmarkerlist[[i]] %in% blacklist_genes)]
  posmarkerlist[[i]] <- posmarkerlist[[i]][order(posmarkerlist[[i]], decreasing = FALSE)]
  posmarkerlist[[i]] <- posmarkerlist[[i]][posselect_list[[i]]]
  if(length(posmarkerlist[[i]]) > 1){

    if(plot %in% breaks){
      posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]], dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
        theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
              plot.title = element_text(hjust = 0.5))+
        labs(x = element_blank(), y = element_blank())+
        ggtitle(celltypes_for_figures[i])
    } else{
      posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
        theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
              plot.title = element_text(hjust = 0.5),
              axis.text.y=element_blank() )+
        labs(x = element_blank(), y = element_blank())+
        ggtitle(celltypes_for_figures[i])
    }
    plot = plot +1
    widths <- c(widths, length(posmarkerlist[[i]]))

  }else{
    if(length(posmarkerlist[[i]]) != 0){
      if(!is.na(posmarkerlist[[i]])){
        if(plot %in% breaks){
          posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5))+
            labs(x = element_blank(), y = element_blank())+
            ggtitle(celltypes_for_figures[i])
        } else if(length(posmarkerlist[[i]]) != 0) {
          posplotlist[[plot]] <- DotPlot(pos_seurat, features = posmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5),
                  axis.text.y=element_blank() )+
            labs(x = element_blank(), y = element_blank())+
            ggtitle(celltypes_for_figures[i])
        }
        plot = plot +1
        widths <- c(widths, length(posmarkerlist[[i]]))
      }
    }
  }
}

#rough plot
#ggarrange(plotlist = neg, common.legend = TRUE)

#format for when decided for final plot
posmarkerplot_select <- (posplotlist[[1]] + posplotlist[[2]] + posplotlist[[3]] + posplotlist[[4]] + plot_layout(widths = c(1,1,.3,1))) /
  (posplotlist[[5]] + posplotlist[[6]] + posplotlist[[7]]    + plot_layout( widths = c(1,1,1))) /
  (posplotlist[[8]]) /
  (posplotlist[[9]] + posplotlist[[10]] +posplotlist[[11]]  + plot_layout( widths = c(2,.5,1))) /
  (posplotlist[[12]] + posplotlist[[13]]+ plot_layout( widths = c(1,2))) +
  plot_layout(guides = "collect")
posmarkerplot_select
ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/posmarkers_select.svg", plot = posmarkerplot_select, width =15 , height = 23 ,units ="in" )
ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/posmarkers_select.pdf", plot = posmarkerplot_select, width =15 , height = 23 ,units ="in" )




negplotlist<- list()
plot = 1
widths = c()

blacklist_genes <- c("MALAT1")
negselect_list <- list(c(3:8), #bcell
                       c(6, 10:14, 16:19), #chondrocytes
                       c(2,12:14), #cmps
                       c(1), #dendritic
                       c(1, 3:7), #endothelial
                       c(), #epithelial
                       c(), #fibroblasts
                       c(), #hepatocytes
                       c(), #hsc
                       c(2,4,5,6,7,10,11), #macrophages
                       c(1:12), #monocytes
                       c(9,23,24), #neutrophils
                       c(3,4,5,8), #nk
                       c(), #preb
                       c(1:11, 13:28, 30, 32:40), #smooth muscle
                       c(5,6,18,38), #tcell
                       c(3,4,8:13, 15:24, 26:35, 40:53),#tissue stems,
                       c()
)

breaks = c( 1, 5,8,10,12)
for(i in 1:length(negmarkerlist)){
  negmarkerlist[[i]] <- negmarkerlist[[i]][!(negmarkerlist[[i]] %in% blacklist_genes)]
  negmarkerlist[[i]] <- negmarkerlist[[i]][order(negmarkerlist[[i]], decreasing = FALSE)]
  negmarkerlist[[i]] <- negmarkerlist[[i]][negselect_list[[i]]]
  if(length(negmarkerlist[[i]]) > 1){

    if(plot %in% breaks){
      negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]], dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
        theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
              plot.title = element_text(hjust = 0.5))+
        labs(x = element_blank(), y = element_blank())+
        ggtitle(celltypes_for_figures[i])
    } else{
      negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
        theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
              plot.title = element_text(hjust = 0.5),
              axis.text.y=element_blank() )+
        labs(x = element_blank(), y = element_blank())+
        ggtitle(celltypes_for_figures[i])
    }
    plot = plot +1
    widths <- c(widths, length(negmarkerlist[[i]]))

  }else{
    if(length(negmarkerlist[[i]]) != 0){
      if(!is.na(negmarkerlist[[i]])){
        if(plot %in% breaks){
          negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5))+
            labs(x = element_blank(), y = element_blank())+
            ggtitle(celltypes_for_figures[i])
        } else if(length(negmarkerlist[[i]]) != 0) {
          negplotlist[[plot]] <- DotPlot(neg_seurat, features = negmarkerlist[[i]],dot.min = 0, col.min = 0, col.max = 2, scale.min = 0, scale.max = 100)+
            theme(axis.text.x = element_text(angle=60, vjust=1, hjust=1),
                  plot.title = element_text(hjust = 0.5),
                  axis.text.y=element_blank() )+
            labs(x = element_blank(), y = element_blank())+
            ggtitle(celltypes_for_figures[i])
        }
        plot = plot +1
        widths <- c(widths, length(negmarkerlist[[i]]))
      }
    }
  }
}

#rough plot
#ggarrange(plotlist = neg, common.legend = TRUE)

#format for when decided for final plot
negmarkerplot_select <- (negplotlist[[1]] + negplotlist[[2]] + negplotlist[[3]] + negplotlist[[4]] + c)) /
  (negplotlist[[5]] + negplotlist[[6]] + negplotlist[[7]]    + plot_layout( widths = c(1,1,1))) /
  (negplotlist[[10]]) /
  (negplotlist[[8]] + negplotlist[[9]] +negplotlist[[9]]  + plot_layout( widths = c(1,1,1))) /
  (negplotlist[[12]] + plot_layout( widths = c(1))) +
  plot_layout(guides = "collect")
negmarkerplot_select
ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/negmarkers_select.svg", plot = negmarkerplot_select, width =15 , height = 23 ,units ="in" )
ggsave("~/Documents/DEGage_Testing/cdf_data/Heidegger/negmarkers_select.pdf", plot = negmarkerplot_select, width =15 , height = 23 ,units ="in" )



 ## SUPPLEMENTARY TABLES #################################################################################################

#pos cell type comparisons
pos_suppdf <- data.frame(matrix(0, ncol = length(celltypes), nrow = length(celltypes)))
colnames(pos_suppdf) <- celltypes
rownames(pos_suppdf) <- celltypes
for(i in 1:(length(celltypes)-1)){
  for(j in (i+1):length(celltypes)){
    index <- which(grepl(celltypes[i], posfiles) & grepl(celltypes[j], posfiles))[1]
    genes <- rownames(poslist[[index]])
    genes <- gsub(" ", "" ,genes)
    str <- ""
    for(k in genes){
      str <- paste(str, k, sep = ";")
    }
    str <- substr(str, 2, nchar(str))
    pos_suppdf[i,j] = str
  }
}
write.csv(pos_suppdf, "~/Documents/DEGage_Testing/cdf_data/Heidegger/suppplementary//pos_celltype_comparisons_degs.csv")

#neg cell type comparisons
neg_suppdf <- data.frame(matrix(0, ncol = length(celltypes), nrow = length(celltypes)))
colnames(neg_suppdf) <- celltypes
rownames(neg_suppdf) <- celltypes
for(i in 1:(length(celltypes)-1)){
  for(j in (i+1):length(celltypes)){
    index <- which(grepl(celltypes[i], negfiles) & grepl(celltypes[j], negfiles))[1]
    genes <- rownames(neglist[[index]])
    genes <- gsub(" ", "" ,genes)
    str <- ""
    for(k in genes){
      str <- paste(str, k, sep = ";")
    }
    str <- substr(str, 2, nchar(str))
    neg_suppdf[i,j] = str
  }
}
write.csv(neg_suppdf, "~/Documents/DEGage_Testing/cdf_data/Heidegger/suppplementary//neg_celltype_comparisons_degs.csv")

#posvneg supp table
posvneg_suppdf <- data.frame("Celltype"=1, "DEGs" = 1)
for(i in 1:length(celltypes)){
  genes <- rownames(posvneglist[[i]])
  genes <- gsub(" ", "" ,genes)
  str <- ""
  for(k in genes){
    str <- paste(str, k, sep = ";")
  }
  str <- substr(str, 2, nchar(str))
  posvneg_suppdf <- rbind(posvneg_suppdf, data.frame("Celltype" = celltypes_for_figures[i], "DEGs" = str))
}
posvneg_suppdf <- posvneg_suppdf[-1,]
write.csv(posvneg_suppdf, "~/Documents/DEGage_Testing/cdf_data/Heidegger/suppplementary/posvneg_degs.csv")



## ASSEMBLING FINAL FIGURE PANEL #####################################
library(cowplot)
library(magick)
library(extrafont)

font_import(prompt = TRUE)

font = "Arial"
fsize = 10

posvneg_ndeg_barplot <- posvneg_ndeg_barplot +
  theme(axis.text = element_text(family = font),
        axis.title = element_text(family = font),
        axis.text.x = element_text(size = fsize),
        axis.text.y = element_text(size = fsize))

marker_violin <- marker_violin +
  theme(axis.text = element_text(family = font),
        axis.title = element_text(family = font),
        axis.text.x = element_text(size = fsize),
        axis.text.y = element_text(size = fsize))
ndeg_heatmap <- ndeg_heatmap +
  theme(axis.text = element_text(family = font),
        axis.title = element_text(family = font),
        axis.text.x = element_text(size = fsize),
        axis.text.y = element_text(size = fsize))
negplotlist[[1]] <- negplotlist[[1]] +
  theme(axis.text = element_text(family = font),
        axis.title = element_text(family = font),
        axis.text.x = element_text(size = fsize),
        axis.text.y = element_text(size = fsize))



tsne <- ggdraw() +
  draw_image("~/Documents/DEGage_stuff/DEGage_Testing/cdf_data/Heidegger/Final Tsne.png")
org <- ggdraw() +
  draw_image("~/Documents/DEGage_stuff/DEGage_Testing/cdf_data/Heidegger/heidegger organization.png")

#compiled_plot <- (org +tsne+ plot_layout(widths = c(1,2))) /
#(ndeg_heatmap) /
#(ggarrange(posvneg_ndeg_barplot, negplotlist[[1]], widths = c(1,1.5)))/
#ggarrange(marker_violin)
#compiled_plot

ggarrange(org, tsne, posvneg_ndeg_barplot, ncol = 3, widths = c(1,2,1)) /
ggarrange(ndeg_heatmap, negplotlist[[1]], widths = c(1.85,1.15)) /
ggarrange(marker_violin)

ggsave(plot = compiled_plot, filename = "~/Documents/DEGage_Testing/cdf_data/Heidegger/compiled.svg",
       units = "in",
       width = 10,
       height = 15.5)
ggsave(plot = compiled_plot, filename = "~/Documents/DEGage_Testing/cdf_data/Heidegger/compiled.pdf",
       units = "in",
       width = 10,
       height = 15.5)
