
library(dplyr)
library(org.Hs.eg.db)

## --- Check overlap between male correlated genes, blue module, and cardiomyopathy DEGs --- ##

dcm.male <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/cardiomyopathy.degs/dcm.male.deseq2.csv")

dcm.female <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/cardiomyopathy.degs/dcm.female.deseq2.csv")

arr.male <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/cardiomyopathy.degs/arrvcm.male.deseq2.csv")

arr.female <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/cardiomyopathy.degs/arrvcm.female.deseq2.csv")

male.corr.genes <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/paper/tables.and.files.for.pub/table.s15.mn.gene.correlations.csv")

#filter cardiomyopathy degs
#note that normal is the reference..so logfc signs are opposite
dcm.male <- na.omit(dcm.male)
dcm.male.up <- dcm.male[dcm.male$padj < 0.05 & dcm.male$log2FoldChange < 0,]$X
dcm.male.down <- dcm.male[dcm.male$padj < 0.05 & dcm.male$log2FoldChange > 0,]$X
dcm.female <- na.omit(dcm.female)
dcm.female.up <- dcm.female[dcm.female$padj < 0.05 & dcm.female$log2FoldChange < 0,]$X
dcm.female.down <- dcm.female[dcm.female$padj < 0.05 & dcm.female$log2FoldChange > 0,]$X
arr.male <- na.omit(arr.male)
arr.male.up <- arr.male[arr.male$padj < 0.05 & arr.male$log2FoldChange < 0,]$X
arr.male.down <- arr.male[arr.male$padj < 0.05 & arr.male$log2FoldChange > 0,]$X
arr.female <- na.omit(arr.female)
arr.female.up <- arr.female[arr.female$padj < 0.05 & arr.female$log2FoldChange < 0,]$X
arr.female.down <- arr.female[arr.female$padj < 0.05 & arr.female$log2FoldChange > 0,]$X
male.pos.corr.genes <- male.corr.genes[male.corr.genes$group == "Stronger Male Correlation: Positive" ,]$hgnc_symbol
male.neg.corr.genes <- male.corr.genes[male.corr.genes$group == "Stronger Male Correlation: Negative" ,]$hgnc_symbol

dcm.common.up <- Reduce(intersect, list(dcm.male.up, dcm.female.up))
dcm.common.down <- Reduce(intersect, list(dcm.male.down, dcm.female.down))
arr.common.up <- Reduce(intersect, list(arr.male.up, arr.female.up))
arr.common.down <- Reduce(intersect, list(arr.male.down, arr.female.down))
pos.corr.common.dcm.up <- intersect(dcm.common.up, male.pos.corr.genes)
pos.corr.common.dcm.down <- intersect(dcm.common.down, male.pos.corr.genes)
neg.corr.common.dcm.up <- intersect(dcm.common.up, male.neg.corr.genes)
neg.corr.common.dcm.down <- intersect(dcm.common.down, male.neg.corr.genes)
pos.corr.common.arr.up <- intersect(arr.common.up, male.pos.corr.genes)
pos.corr.common.arr.down <- intersect(arr.common.down, male.pos.corr.genes)
neg.corr.common.arr.up <- intersect(arr.common.up, male.neg.corr.genes)
neg.corr.common.arr.down <- intersect(arr.common.down, male.neg.corr.genes)
#print(dcm.common.up)
#print(dcm.common.down)
#print(arr.common.up)
#print(arr.common.down)
print(pos.corr.common.dcm.up)
print(pos.corr.common.dcm.down)
print(neg.corr.common.dcm.up)
print(neg.corr.common.dcm.down)
print(pos.corr.common.arr.up)
print(pos.corr.common.arr.down)
print(neg.corr.common.arr.up)
print(neg.corr.common.arr.down)

#now by sex
pos.corr.dcm.up.male <- intersect(dcm.male.up, male.pos.corr.genes)
pos.corr.dcm.down.male <- intersect(dcm.male.down, male.pos.corr.genes)
neg.corr.dcm.up.male <- intersect(dcm.male.up, male.neg.corr.genes)
neg.corr.dcm.down.male <- intersect(dcm.male.down, male.neg.corr.genes)
pos.corr.arr.up.male <- intersect(arr.male.up, male.pos.corr.genes)
pos.corr.arr.down.male <- intersect(arr.male.down, male.pos.corr.genes)
neg.corr.arr.up.male <- intersect(arr.male.up, male.neg.corr.genes)
neg.corr.arr.down.male <- intersect(arr.male.down, male.neg.corr.genes)

pos.corr.dcm.up.female <- intersect(dcm.female.up, male.pos.corr.genes)
pos.corr.dcm.down.female <- intersect(dcm.female.down, male.pos.corr.genes)
neg.corr.dcm.up.female <- intersect(dcm.female.up, male.neg.corr.genes)
neg.corr.dcm.down.female <- intersect(dcm.female.down, male.neg.corr.genes)
pos.corr.arr.up.female <- intersect(arr.female.up, male.pos.corr.genes)
pos.corr.arr.down.female <- intersect(arr.female.down, male.pos.corr.genes)
neg.corr.arr.up.female <- intersect(arr.female.up, male.neg.corr.genes)
neg.corr.arr.down.female <- intersect(arr.female.down, male.neg.corr.genes)

print(pos.corr.dcm.up.male)
print(pos.corr.dcm.down.male)
print(neg.corr.dcm.up.male)
print(neg.corr.dcm.down.male)
print(pos.corr.arr.up.male)
print(pos.corr.arr.down.male)
print(neg.corr.arr.up.male)
print(neg.corr.arr.down.male)

print(pos.corr.dcm.up.male)
print(pos.corr.dcm.down.male)
print(neg.corr.dcm.up.male)
print(neg.corr.dcm.down.male)
print(pos.corr.arr.up.male)
print(pos.corr.arr.down.male)
print(neg.corr.arr.up.male)
print(neg.corr.arr.down.male)

#now blue module

blue <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/GSE65830/blue.module.genes.csv")$blue

blue.common.dcm.up <- intersect(dcm.common.up, blue)
blue.common.arr.up <- intersect(arr.common.up, blue)
blue.male.dcm.up <- intersect(dcm.male.up, blue)
blue.male.dcm.down <- intersect(dcm.male.down, blue)
blue.female.dcm.up <- intersect(dcm.female.up, blue)
blue.female.dcm.down <- intersect(dcm.female.down, blue)
print(blue.common.dcm.up)
print(blue.common.arr.up)
print(blue.male.dcm.up)
print(blue.male.dcm.down)
print(blue.female.dcm.up)
print(blue.female.dcm.down)


#now a function to summarize all of these results in one table

get_deg_direction <- function(gene_list, logfc_list, padj_list, gene_names) {
  directions <- rep(NA, length(gene_names))
  
  for (i in seq_along(gene_names)) {
    idx <- which(gene_list == gene_names[i])
    if (length(idx) > 0 && padj_list[idx] < 0.05) {
      directions[i] <- ifelse(logfc_list[idx] < 0, "Up", "Down")
    }
  }
  return(directions)
}

#genes of interest
all_genes <- unique(c(
  dcm.male$X, dcm.female$X,
  arr.male$X, arr.female$X,
  male.pos.corr.genes, male.neg.corr.genes,
  blue
))

#initial table
gene_summary <- data.frame(
  Gene = all_genes,
  DEG_DCM_Male = all_genes %in% dcm.male[dcm.male$padj < 0.05, ]$X,
  DEG_DCM_Female = all_genes %in% dcm.female[dcm.female$padj < 0.05, ]$X,
  DEG_ARR_Male = all_genes %in% arr.male[arr.male$padj < 0.05, ]$X,
  DEG_ARR_Female = all_genes %in% arr.female[arr.female$padj < 0.05, ]$X,
  PosCorr = all_genes %in% male.pos.corr.genes,
  NegCorr = all_genes %in% male.neg.corr.genes,
  BlueModule = all_genes %in% blue
)

#add "both sexes" DEGs
gene_summary$DEG_DCM_Both <- gene_summary$DEG_DCM_Male & gene_summary$DEG_DCM_Female
gene_summary$DEG_ARR_Both <- gene_summary$DEG_ARR_Male & gene_summary$DEG_ARR_Female

#directionality for columns
gene_summary$DCM_Male_Direction <- get_deg_direction(dcm.male$X, dcm.male$log2FoldChange, dcm.male$padj, gene_summary$Gene)
gene_summary$DCM_Female_Direction <- get_deg_direction(dcm.female$X, dcm.female$log2FoldChange, dcm.female$padj, gene_summary$Gene)
gene_summary$ARR_Male_Direction <- get_deg_direction(arr.male$X, arr.male$log2FoldChange, arr.male$padj, gene_summary$Gene)
gene_summary$ARR_Female_Direction <- get_deg_direction(arr.female$X, arr.female$log2FoldChange, arr.female$padj, gene_summary$Gene)

#filter to only relevant genes
gene_summary_filtered <- gene_summary[
  gene_summary$DEG_DCM_Male | gene_summary$DEG_DCM_Female |
    gene_summary$DEG_ARR_Male | gene_summary$DEG_ARR_Female |
    gene_summary$PosCorr | gene_summary$NegCorr | gene_summary$BlueModule,
]

write.csv(gene_summary_filtered, "/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/paper.v2/tables/DEGs.overlap.full.list.csv")

#now lets print what we are interested in

#we have 6 scenarios

# 1. Downregulated in both sexes & negatively correlated (DCM + ARR)
gene_summary_filtered[
  gene_summary_filtered$NegCorr == TRUE &
    gene_summary_filtered$DCM_Male_Direction == "Down" &
    gene_summary_filtered$DCM_Female_Direction == "Down" &
    gene_summary_filtered$ARR_Male_Direction == "Down" &
    gene_summary_filtered$ARR_Female_Direction == "Down" &
    gene_summary_filtered$DEG_DCM_Both == TRUE &
    gene_summary_filtered$DEG_ARR_Both == TRUE,
]$Gene

# 2. Upregulated in both sexes & positively correlated (DCM + ARR)
gene_summary_filtered[
  gene_summary_filtered$PosCorr == TRUE &
    gene_summary_filtered$DCM_Male_Direction == "Up" &
    gene_summary_filtered$DCM_Female_Direction == "Up" &
    gene_summary_filtered$ARR_Male_Direction == "Up" &
    gene_summary_filtered$ARR_Female_Direction == "Up" &
    gene_summary_filtered$DEG_DCM_Both == TRUE &
    gene_summary_filtered$DEG_ARR_Both == TRUE,
]$Gene

# 3. DCM-only: Upregulated in both sexes
gene_summary_filtered[
  gene_summary_filtered$DCM_Male_Direction == "Up" &
    gene_summary_filtered$DCM_Female_Direction == "Up" &
    gene_summary_filtered$DEG_DCM_Both == TRUE,
]$Gene

# 4. DCM-only: Downregulated in both sexes
gene_summary_filtered[
  gene_summary_filtered$DCM_Male_Direction == "Down" &
    gene_summary_filtered$DCM_Female_Direction == "Down" &
    gene_summary_filtered$DEG_DCM_Both == TRUE,
]$Gene

# 5. ARR-only: Upregulated in both sexes
gene_summary_filtered[
  gene_summary_filtered$ARR_Male_Direction == "Up" &
    gene_summary_filtered$ARR_Female_Direction == "Up" &
    gene_summary_filtered$DEG_ARR_Both == TRUE,
]$Gene

# 6. ARR-only: Downregulated in both sexes
gene_summary_filtered[
  gene_summary_filtered$ARR_Male_Direction == "Down" &
    gene_summary_filtered$ARR_Female_Direction == "Down" &
    gene_summary_filtered$DEG_ARR_Both == TRUE,
]$Gene


#we also want to compare the # SPC genes that are significantly upregulated in male and female disease types

#male up ARR
length(gene_summary_filtered[
  gene_summary_filtered$ARR_Male_Direction == "Up" &
    gene_summary_filtered$DEG_ARR_Male == TRUE,
]$Gene)

#female up ARR
length(gene_summary_filtered[
  gene_summary_filtered$ARR_Female_Direction == "Up" &
    gene_summary_filtered$DEG_ARR_Female == TRUE,
]$Gene)

#male up DCM
length(gene_summary_filtered[
  gene_summary_filtered$DCM_Male_Direction == "Up" &
    gene_summary_filtered$DEG_DCM_Male == TRUE,
]$Gene)

#female up DCM
length(gene_summary_filtered[
  gene_summary_filtered$DCM_Female_Direction == "Up" &
    gene_summary_filtered$DEG_DCM_Female == TRUE,
]$Gene)


## --- Check overlap between JASPAR (region of interest) and pos corr genes, skeletal muscle nm vs m schwann degs, and heart nm vs m schwann degs 

jaspar <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/paper.v2/tables/jaspar.region.of.interest.csv")
jaspar <- jaspar$TFName

fusion.parts <- unique(unlist(strsplit(grep("::", jaspar, value = TRUE), "::")))

nm.vs.m.sma <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/paper.v2/tables/all.nmsc.vs.msc.skeletal.muscle copy.csv")
nm.vs.m.sma <- na.omit(nm.vs.m.sma)
nm.vs.m.sma.up <- nm.vs.m.sma[nm.vs.m.sma$padj < 0.05 & nm.vs.m.sma$log2FoldChange > 0,]$X
nm.vs.m.sma.down <- nm.vs.m.sma[nm.vs.m.sma$padj < 0.05 & nm.vs.m.sma$log2FoldChange < 0,]$X

nm.vs.m.heart <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/paper.v2/tables/all.nmsc.vs.msc.heart copy.csv")
nm.vs.m.heart <- na.omit(nm.vs.m.heart)
nm.vs.m.heart.up <- nm.vs.m.heart[nm.vs.m.heart$padj < 0.05 & nm.vs.m.heart$log2FoldChange > 0,]$X
nm.vs.m.heart.down <- nm.vs.m.heart[nm.vs.m.heart$padj < 0.05 & nm.vs.m.heart$log2FoldChange < 0,]$X


poss.corr.jaspar <- intersect(jaspar, male.pos.corr.genes)
poss.corr.jaspar.fusion <- intersect(fusion.parts, male.pos.corr.genes)
heart.up.jaspar <- intersect(jaspar, nm.vs.m.heart.up)
heart.up.jaspar.fusion <- intersect(fusion.parts, nm.vs.m.heart.up)
skel.muscle.up.jaspar <- intersect(jaspar, nm.vs.m.sma.up)
skel.muscle.up.jaspar.fusion <- intersect(fusion.parts, nm.vs.m.sma.up)

print(poss.corr.jaspar)
print(poss.corr.jaspar.fusion)
print(heart.up.jaspar)
print(heart.up.jaspar.fusion)
print(skel.muscle.up.jaspar)
print(skel.muscle.up.jaspar.fusion)

nm.vs.m.sma[nm.vs.m.sma$X %in% skel.muscle.up.jaspar,]
nm.vs.m.sma[nm.vs.m.sma$X %in% skel.muscle.up.jaspar.fusion,]
nm.vs.m.heart[nm.vs.m.heart$X %in% heart.up.jaspar,]
nm.vs.m.heart[nm.vs.m.heart$X %in% heart.up.jaspar.fusion,]


#now check overlap between up and downregulated nm schwann cell genes with blue module
print(intersect(blue, nm.vs.m.sma.up))
print(paste(length(intersect(blue, nm.vs.m.sma.up))/length(nm.vs.m.sma.up)*100, "% of upregulated skeletal muscle genes overlap with blue module genes"))
print(intersect(blue, nm.vs.m.sma.down))
print(paste(length(intersect(blue, nm.vs.m.sma.down))/length(nm.vs.m.sma.down)*100, "% of downregulated skeletal muscle genes overlap with blue module genes"))
print(intersect(blue, nm.vs.m.heart.up))
print(paste(length(intersect(blue, nm.vs.m.heart.up))/length(nm.vs.m.heart.up)*100, "% of upregulated skeletal muscle genes overlap with blue module genes"))
print(intersect(blue, nm.vs.m.heart.down))
print(paste(length(intersect(blue, nm.vs.m.heart.down))/length(nm.vs.m.heart.down)*100, "% of downregulated skeletal muscle genes overlap with blue module genes"))


## --- Next we check how many heart and skeletal muscle DEGs overlap with pos and neg corr genes --- ##

poss.corr.heart.up <- intersect(nm.vs.m.heart.up, male.pos.corr.genes)
neg.corr.heart.down <- intersect(nm.vs.m.heart.down, male.neg.corr.genes)

poss.corr.sma.up <- intersect(nm.vs.m.sma.up, male.pos.corr.genes)
neg.corr.sma.down <- intersect(nm.vs.m.sma.down, male.neg.corr.genes)

print(poss.corr.heart.up)
print(neg.corr.heart.down)

print(poss.corr.sma.up)
print(neg.corr.sma.down)


## --- Lastly, we want to know how many genes in blue module, pos corr, and neg corr are logands vs receptors

lr.pairs <- read.csv("/Users/kevinoleary/Dropbox (EinsteinMed)/Result_from_KevinO/neuron.xist/cellchat.lr.pairs.csv")

#get ligands and receptors
ligands <- unique(lr.pairs$ligand)
receptors <- unique(lr.pairs$receptor)

#function to annotate genes
annotate_genes <- function(gene_list) {
  data.frame(
    gene = gene_list,
    role = case_when(
      gene_list %in% ligands & gene_list %in% receptors ~ "both",
      gene_list %in% ligands ~ "ligand",
      gene_list %in% receptors ~ "receptor",
      TRUE ~ "neither"
    )
  )
}

#apply to gene lists 
lr.male.pos.corr.genes <- annotate_genes(male.pos.corr.genes)
table(lr.male.pos.corr.genes$role)
lr.male.neg.corr.genes <- annotate_genes(male.neg.corr.genes)
table(lr.male.neg.corr.genes$role)
lr.blue <- annotate_genes(blue)
table(lr.blue$role)
lr.nm.vs.m.sma.up <- annotate_genes(nm.vs.m.sma.up)
lr.nm.vs.m.sma.down <- annotate_genes(nm.vs.m.sma.down)
lr.nm.vs.m.heart.up <- annotate_genes(nm.vs.m.heart.up)
lr.nm.vs.m.heart.down <- annotate_genes(nm.vs.m.heart.down)

#display ligands and receptors
print_gene_roles <- function(annotated_df, label) {
  ligands <- annotated_df %>% filter(role == "ligand" | role == "both") %>% pull(gene)
  receptors <- annotated_df %>% filter(role == "receptor" | role == "both") %>% pull(gene)
  
  cat(paste0(label, " ligands: ", paste(ligands, collapse = ", "), "\n"))
  cat(paste0(label, " receptors: ", paste(receptors, collapse = ", "), "\n\n"))
}

print_gene_roles(lr.male.pos.corr.genes, "male pos corr")
print_gene_roles(lr.male.neg.corr.genes, "male neg corr")
print_gene_roles(lr.blue, "blue")
print_gene_roles(lr.nm.vs.m.sma.up, "sma up")
print_gene_roles(lr.nm.vs.m.sma.down, "sma down")
print_gene_roles(lr.nm.vs.m.heart.up, "heart up")
print_gene_roles(lr.nm.vs.m.heart.down, "heart down")


