getwd()

library(ggplot2)
library(tidyverse)
library(ggchicklet) 
library(hrbrthemes)
library(clusterProfiler)
library(enrichplot)
library(openxlsx)

egg <- read.csv("5.Gene_annotation/Lnose.eggout.emapper.annotations",sep="\t",header=T, stringsAsFactors = FALSE)
head(egg)
##########################
head(egg)

### 1.GO Annotation ----
# Go
gene_ids <- egg$query
eggnog_annotations_go <- str_split(egg$GOs,",")
gene_to_go <- data.frame(gene = rep(gene_ids,times=sapply(eggnog_annotations_go, length)),term = unlist(eggnog_annotations_go))
gene2go <- filter(gene_to_go,term != "-")

term2gene <- gene2go[,c(2,1)]
colnames(term2gene)[1] <- "ID"

df1 <- go2term(term2gene$ID)  
df2 <- go2ont(term2gene$ID)   
colnames(df1)[1] <- "ID"
colnames(df2)[1] <- "ID"

df <- left_join(term2gene,df1,by="ID")
df3 <- left_join(df,df2,by="ID")
colnames(df3)[4] <- "Class"
gid2gene <- df3[,c("ID","gene","Class")]
gid2name <- df3[,c("ID","Term","Class")]

gene <- read.table("12.Marco_MicroEVO/micro.id",header = F)
gene <- gene$V1[1:nrow(gene)]
ego <- enricher(gene,TERM2GENE = gid2gene,TERM2NAME = gid2name,pvalueCutoff = 0.05,qvalueCutoff = 0.2,minGSSize = 10)
as.data.frame(ego)
sheets <- list("LnoseMicroGO" = as.data.frame(ego))
write.xlsx(sheets,"Lnose.micro.enrich.xlsx")
###############################################
dotplot(ego)
##################
gid2name <- split(gid2name,with(gid2name,Class)) 
gid2gene <- split(gid2gene,with(gid2gene,Class))

ego_BP <- enricher(gene,TERM2GENE = gid2gene[['BP']][c(1,2)],TERM2NAME = gid2name[['BP']][c(1,2)],
                   pvalueCutoff = 0.05,qvalueCutoff = 0.2,minGSSize = 10)
dotplot(ego_BP,title = "BP")

cnetplot(ego_BP)

ego_CC <- enricher(gene,TERM2GENE = gid2gene[['CC']][c(1,2)],TERM2NAME = gid2name[['CC']][c(1,2)],
                   pvalueCutoff = 0.05,qvalueCutoff = 0.2,minGSSize = 10)
dotplot(ego_CC,title = "CC")

ego_MF <- enricher(gene,TERM2GENE = gid2gene[['MF']][c(1,2)],TERM2NAME = gid2name[['MF']][c(1,2)],
                   pvalueCutoff = 0.05,qvalueCutoff = 0.2,minGSSize = 10)
dotplot(ego_MF)

library(ggpubr)
BP <- dotplot(ego_BP,title = "BP")
CC <- dotplot(ego_CC,title = "CC")
MF <- dotplot(ego_MF,title = "MF")

ggarrange(BP,CC,MF,ncol = 1,nrow = 3,align = "hv")
#######################################################
gene_ko <- egg %>% dplyr::select(GID=query,Ko = KEGG_ko)  

gene_ko[,2] <- gsub("ko:","",gene_ko[,2]) 
gene_ko_list <- str_split(gene_ko$Ko,",")
gene2ko <- data.frame(gene = rep(gene_ids,times = sapply(gene_ko_list,length)),
                      term = unlist(gene_ko_list))
gene2ko <- filter(gene2ko,term != "-")

gene_pathway <- egg %>%
  dplyr::select(GID = query, Pathway = KEGG_Pathway) 
gene_pathway_list <- str_split(gene_pathway$Pathway, ",")

gene2pathway <- data.frame(gene = rep(gene_ids,
                                      times = sapply(gene_pathway_list, length)),
                           term = unlist(gene_pathway_list))
gene2pathway <- filter(gene2pathway, term != "-")


term2gene <- gene2pathway[,c(2,1)]
colnames(term2gene)[1] <- "ID"
pathway2name <- ko2name(term2gene$ID) 
pathway2name <- na.omit(pathway2name)
pathway2name <- unique.data.frame(pathway2name)
colnames(pathway2name)[1] <- "ID"
ko2gene <- term2gene[grep(pattern = "^ko", term2gene$ID),]

kegg <- enricher(gene, TERM2GENE = ko2gene, TERM2NAME = pathway2name,
                 pvalueCutoff = 0.05, qvalueCutoff = 0.2, minGSSize = 10)
kegg_result <- as.data.frame(kegg)  
#########################################
koterms <- egg %>%dplyr::select(GID = query, KO=KEGG_ko)%>%na.omit()%>% filter(str_detect(KO,"ko")) 
head(koterms) 
load("kegg_info.RData")
head(ko2pathway) 
head(pathway2name) 
write.table(pathway2name,file="12.Marco_MicroEVO/pathway2name.txt",sep="\t",row.names=F,quote=F)
library(stringr)
colnames(ko2pathway)=c("KO",'Pathway') 
koterms$KO=str_replace_all(koterms$KO,"ko:","") 
gene2pathway <- koterms %>% left_join(ko2pathway, by = "KO") %>%dplyr::select(GID, Pathway) %>%na.omit() 
gene2pathway_name<-left_join(gene2pathway,pathway2name,by="Pathway") 
write.table(gene2pathway_name,file="12.Marco_MicroEVO/gene2pathway_name.txt",sep="\t",row.names=F,quote=F) 

kegg <- read.table("12.Marco_MicroEVO/gene2pathway_name.txt",header = T,sep = "\t")
kegg2gene <- kegg[,c(2,1)]
kegg2name <- kegg[,c(2,3)]

c1kegg <- enricher(gene,TERM2GENE = kegg2gene,TERM2NAME = kegg2name,
              pAdjustMethod = "BH",pvalueCutoff  = 0.05, qvalueCutoff  = 0.2)
as.data.frame(c1kegg)
dotplot(c1kegg)
sheets <- list("LnoseMicroGO" = as.data.frame(ego),
               "kegg" = as.data.frame(c1kegg))
write.xlsx(sheets,"12.Marco_MicroEVO/Lnose.micro.enrich.xlsx")









