library(clusterProfiler)
library(org.Mm.eg.db)

# mus_gene <- read.csv("data/extra_files_for_DE/MusAssignIDs.csv", sep = "\t")
# head(mus_gene)

# entrezID <- select(org.Mm.eg.db, keys = DE_total$Gene, columns = "ENTREZID", keytype = "SYMBOL") 

Alltable_onl = read.csv('datalocalandonline_cds_counts_2025_final.tsv', h=T, sep = "\t", row.names=1) #15796   125
colnames(Alltable_onl) = gsub("loc_cds_SHPC43_Mastomys_kollmanspergeri", "loc_cds_SHPC43_Mastomys_kollmannspergeri", colnames(Alltable_onl))
colnames(Alltable_onl) = gsub("loc_cds_SHPC74_Mastomys_kollmanspergeri", "loc_cds_SHPC74_Mastomys_kollmannspergeri", colnames(Alltable_onl))
colnames(Alltable_onl) = gsub("loc_cds_SHPC75_Mastomys_kollmanspergeri", "loc_cds_SHPC75_Mastomys_kollmannspergeri", colnames(Alltable_onl))
colnames(Alltable_onl) = gsub("loc_cds_SHPC76_Mastomys_kollmanspergeri", "loc_cds_SHPC76_Mastomys_kollmannspergeri", colnames(Alltable_onl))
colnames(Alltable_onl) = gsub("SHPC46_Arvicanthis_niloticus_lmo", "SHPC46_Mastomys_natalensis", colnames(Alltable_onl)) #change Ani_lmo to Mna even if not using in expression)

# coldata

coldata <- read.csv("coldata_July2025.tsv", sep = "\t")
coldata_tot <- coldata[which(coldata$total_set == "yes"),]
coldata_mur <- coldata[which(coldata$muridae_set == "yes"),]
coldata_rec <- coldata[which(coldata$recent_set == "yes"),]
coldata_anc <- coldata[which(coldata$ancient_set == "yes"),]


################################################################################
# DATA PREPARATION
##################################

# Total dataset
#background total set
All_genes_tot = Alltable_onl[,colnames(Alltable_onl) %in% coldata_tot$ID]
mt_genes = grep(pattern = "mt-", rownames(All_genes_tot))
if (length(mt_genes) > 0) {
  Alltableok <- All_genes_tot[-mt_genes,] #Remove mitochondrial genes (Co1, Co2, Cytb, Nd1, Nd2, Nd5, Nd6)
} else {
  Alltableok = All_genes_tot
}

Alltableok1 <- round(Alltableok[complete.cases(Alltableok),])
Alltableok1[is.na(Alltableok1)]=0
n=apply(Alltableok1,1,function(x){sum(x==0)})
#counts=counts[n<ncol(counts),]
back_tot1=Alltableok1[n==0,]

back_tot <- select(org.Mm.eg.db, keys = row.names(back_tot1), columns = "ENTREZID", keytype = "SYMBOL") 
head(back_tot)

# prepare DE gene list - order and get entrez gene ID
DE_total <- read.csv("dataset_tot_final/Table_DE_results_cond_season_thres01.tsv", sep = "\t")
head(DE_total)
DE_tot_id <- select(org.Mm.eg.db, keys = row.names(DE_total), columns = "ENTREZID", keytype = "SYMBOL") 
head(DE_tot_id)
sum(is.na(DE_tot_id$ENTREZID))

DE_tot_ego <- enrichGO(gene          = DE_tot_id$ENTREZID,
                universe      = back_tot$ENTREZID,
                OrgDb         = org.Mm.eg.db,
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.05,
                qvalueCutoff = 0.05,
                readable      = TRUE)
head(DE_tot_ego)
write.table(DE_tot_ego, file = "Enrichment/GO_clusterProfiler_DE_total.tsv",
            sep = "\t", quote = FALSE)


# prepare wgcna gene list - keep only sig modules, order genes O first and U after and get entrez gene ID
WG_total <- read.csv("dataset_tot_WGCNA_q50_power5_0910/Table_gene_modules_q50.txt", sep = "\t")
unique(WG_total$colors)
list_tot_mod <- c("green", "blue", "purple", "tan", "turquoise", "yellow", "pink")

WG_tot_sig <- WG_total[which(WG_total$colors %in% list_tot_mod), ]

head(WG_tot_sig)
WG_tot_sig_id <- select(org.Mm.eg.db, keys = WG_tot_sig$gene_id, columns = "ENTREZID", keytype = "SYMBOL") 

WG_tot_ego <- enrichGO(gene          = WG_tot_sig_id$ENTREZID,
                       universe      = back_tot$ENTREZID,
                       OrgDb         = org.Mm.eg.db,
                       ont           = "ALL",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.05,
                       qvalueCutoff = 0.05,
                       readable      = TRUE)
head(WG_tot_ego)
dim(WG_tot_ego)
write.table(WG_tot_ego, file = "Enrichment/GO_clusterProfiler_WGCNA_total.tsv",
            sep = "\t", quote = FALSE)



##################################
# Ancient dataset
#background total set
All_genes_anc = Alltable_onl[,colnames(Alltable_onl) %in% coldata_anc$ID]
mt_genes = grep(pattern = "mt-", rownames(All_genes_anc))
if (length(mt_genes) > 0) {
  Alltableok <- All_genes_anc[-mt_genes,] #Remove mitochondrial genes (Co1, Co2, Cytb, Nd1, Nd2, Nd5, Nd6)
} else {
  Alltableok = All_genes_anc
}

Alltableok1 <- round(Alltableok[complete.cases(Alltableok),])
Alltableok1[is.na(Alltableok1)]=0
n=apply(Alltableok1,1,function(x){sum(x==0)})
#counts=counts[n<ncol(counts),]
back_anc1=Alltableok1[n==0,]

back_anc <- select(org.Mm.eg.db, keys = row.names(back_anc1), columns = "ENTREZID", keytype = "SYMBOL") 

# DE
DE_ancient <- read.csv("dataset_ancient_final/Table_DE_results_cond_season_thres01.tsv", sep = "\t")
head(DE_ancient)
DE_anc_id <- select(org.Mm.eg.db, keys = row.names(DE_ancient), columns = "ENTREZID", keytype = "SYMBOL") 
head(DE_anc_id)
sum(is.na(DE_anc_id$ENTREZID))

DE_anc_ego <- enrichGO(gene          = DE_anc_id$ENTREZID,
                       universe      = back_anc$ENTREZID,
                       OrgDb         = org.Mm.eg.db,
                       ont           = "ALL",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.05,
                       qvalueCutoff = 0.05,
                       readable      = TRUE)
head(DE_anc_ego)
write.table(DE_anc_ego, file = "Enrichment/GO_clusterProfiler_DE_ancient.tsv",
            sep = "\t", quote = FALSE)

# WGCNA
WG_ancient <- read.csv("dataset_ancient_WGCNA_q50_power20_0910/Table_gene_modules_q50.txt", sep = "\t")
unique(WG_ancient$colors)
list_anc_mod <- c("yellow", "tan", "brown", "black", "magenta", "green", "pink")

WG_anc_sig <- WG_ancient[which(WG_ancient$colors %in% list_anc_mod), ]

WG_anc_sig
head(WG_anc_sig)
WG_anc_sig_id <- select(org.Mm.eg.db, keys = WG_anc_sig$gene_id, columns = "ENTREZID", keytype = "SYMBOL") 

WG_anc_ego <- enrichGO(gene          = WG_anc_sig_id$ENTREZID,
                       universe      = back_anc$ENTREZID,
                       OrgDb         = org.Mm.eg.db,
                       ont           = "ALL",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.05,
                       qvalueCutoff = 0.05,
                       readable      = TRUE)
head(WG_anc_ego)
dim(WG_anc_ego)
write.table(WG_anc_ego, file = "Enrichment/GO_clusterProfiler_WGCNA_ancient.tsv",
            sep = "\t", quote = FALSE)

##################################
# Recent dataset
#background total set
All_genes_rec = Alltable_onl[,colnames(Alltable_onl) %in% coldata_rec$ID]
mt_genes = grep(pattern = "mt-", rownames(All_genes_rec))
if (length(mt_genes) > 0) {
  Alltableok <- All_genes_rec[-mt_genes,] #Remove mitochondrial genes (Co1, Co2, Cytb, Nd1, Nd2, Nd5, Nd6)
} else {
  Alltableok = All_genes_rec
}

Alltableok1 <- round(Alltableok[complete.cases(Alltableok),])
Alltableok1[is.na(Alltableok1)]=0
n=apply(Alltableok1,1,function(x){sum(x==0)})
#counts=counts[n<ncol(counts),]
back_rec1=Alltableok1[n==0,]

back_rec <- select(org.Mm.eg.db, keys = row.names(back_rec1), columns = "ENTREZID", keytype = "SYMBOL") 

DE_recent <- read.csv("dataset_recent_final/Table_DE_results_cond_season_thres01.tsv", sep = "\t")
head(DE_recent)
DE_rec_id <- select(org.Mm.eg.db, keys = row.names(DE_recent), columns = "ENTREZID", keytype = "SYMBOL") 
sum(is.na(DE_rec_id$ENTREZID))

DE_rec_ego <- enrichGO(gene          = DE_rec_id$ENTREZID,
                       universe      = back_rec$ENTREZID,
                       OrgDb         = org.Mm.eg.db,
                       ont           = "ALL",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.05,
                       qvalueCutoff = 0.05,
                       readable      = TRUE)
head(DE_rec_ego)
write.table(DE_rec_ego, file = "Enrichment/GO_clusterProfiler_DE_recent.tsv",
            sep = "\t", quote = FALSE)


WG_recent <- read.csv("dataset_recent_WGCNA_q50_power8_0910/Table_gene_modules_q50.txt", sep = "\t")
list_rec_mod <- c("brown", "pink", "salmon")

WG_rec_sig <- WG_recent[which(WG_recent$colors %in% list_rec_mod), ]

head(WG_rec_sig)
WG_rec_sig_id <- select(org.Mm.eg.db, keys = WG_rec_sig$gene_id, columns = "ENTREZID", keytype = "SYMBOL") 

WG_rec_ego <- enrichGO(gene          = WG_rec_sig_id$ENTREZID,
                       universe      = back_rec$ENTREZID,
                       OrgDb         = org.Mm.eg.db,
                       ont           = "ALL",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.05,
                       qvalueCutoff = 0.05,
                       readable      = TRUE)
head(WG_rec_ego)
write.table(WG_rec_ego, file = "Enrichment/GO_clusterProfiler_WGCNA_recent.tsv",
            sep = "\t", quote = FALSE)


##################################
# Murinae dataset
#background total set
All_genes_mur = Alltable_onl[,colnames(Alltable_onl) %in% coldata_mur$ID]
mt_genes = grep(pattern = "mt-", rownames(All_genes_mur))
if (length(mt_genes) > 0) {
  Alltableok <- All_genes_mur[-mt_genes,] #Remove mitochondrial genes (Co1, Co2, Cytb, Nd1, Nd2, Nd5, Nd6)
} else {
  Alltableok = All_genes_mur
}

Alltableok1 <- round(Alltableok[complete.cases(Alltableok),])
Alltableok1[is.na(Alltableok1)]=0
n=apply(Alltableok1,1,function(x){sum(x==0)})
#counts=counts[n<ncol(counts),]
back_mur1=Alltableok1[n==0,]


back_mur <- select(org.Mm.eg.db, keys = row.names(back_mur1), columns = "ENTREZID", keytype = "SYMBOL") 

DE_mur <- read.csv("dataset_muridae_final/Table_DE_results_cond_season_thres01.tsv", sep = "\t")
DE_mur_id <- select(org.Mm.eg.db, keys = row.names(DE_mur), columns = "ENTREZID", keytype = "SYMBOL") 
sum(is.na(DE_mur_id$ENTREZID))

DE_mur_ego <- enrichGO(gene          = DE_mur_id$ENTREZID,
                       universe      = back_mur$ENTREZID,
                       OrgDb         = org.Mm.eg.db,
                       ont           = "ALL",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.05,
                       qvalueCutoff = 0.05,
                       readable      = TRUE)
head(DE_mur_ego)
write.table(DE_mur_ego, file = "Enrichment/GO_clusterProfiler_DE_murinae.tsv",
            sep = "\t", quote = FALSE)

WG_murinae <- read.csv("dataset_muridae_WGCNA_q50_power20_0910/Table_gene_modules_q50.txt", sep = "\t")
list_mur_mod <- c("greenyellow", "turquoise", "green", "lightcyan", "tan", "blue", "red", "magenta", "pink", "yellow")

WG_mur_sig <- WG_murinae[which(WG_murinae$colors %in% list_mur_mod), ]

head(WG_mur_sig)

WG_mur_sig_id <- select(org.Mm.eg.db, keys = WG_mur_sig$gene_id, columns = "ENTREZID", keytype = "SYMBOL") 

WG_mur_ego <- enrichGO(gene          = WG_mur_sig_id$ENTREZID,
                       universe      = back_mur$ENTREZID,
                       OrgDb         = org.Mm.eg.db,
                       ont           = "ALL",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.05,
                       qvalueCutoff = 0.05,
                       readable      = TRUE)
head(WG_mur_ego)
dim(WG_mur_ego)
write.table(WG_mur_ego, file = "Enrichment/GO_clusterProfiler_WGCNA_murinae.tsv",
            sep = "\t", quote = FALSE)




