# setwd("/Users/bryanteefy/Dropbox/PIWI/CODE/3_Differential_Gene_Expression_Analysis")
setwd("/Users/berenice/Dropbox/Manuscripts_and_Publications/2022/2022_Bryan_PIWI_manuscript/CODE/3_Differential_Gene_Expression_Analysis")
# GO Analysis
# Original script written by Param Priya Singh
# Modified by Bryan Teefy & Bérénice Benayoun
# R version 4.1.2 (2021-11-01)
# load required libraries
library(GOstats)   # GOstats_2.60.0
library(biomaRt)   # biomaRt_2.50.3
library(GSEABase)  # GSEABase_1.56.0
library(scales)    # scales_1.2.0
library(ggplot2)   # ggplot2_3.3.6
library(dplyr)     # dplyr_1.0.9
library(ggplotify) # ggplotify_0.1.0
library(patchwork) # patchwork_1.1.1
library(pheatmap)  # pheatmap_1.0.12
# load functions to run enrichment information
source('4a_Functions_for_GO_Enrich.R')
#################################################
# 1. prepare data for geneset analysis and set parameters
# import Killi CDS ID to human Ensembl Gene ID conversion table
blast_table <- read.table("./Input/Parsed_human_to_killi_BLAST_1e-3.txt", header = F, sep = "\t")
colnames(blast_table) <- c("killi_prot", "ensembl_gene_id")
# import human name to Ensembl ID conversion table
name_table <- read.csv("./Input/Ens104_GeneID_GeneSymbol_HUMAN_mart_export.txt", header = T, sep = "\t")
colnames(name_table) <- c("ensembl_gene_id", "Human_Gene_Symbol")
# import Gene ID to Killi CDS ID conversion table
prot_table <- read.csv("./Input/Parsed_GCA_014300015.1_MPIA_NFZ_2.0_Gene_to_Protein_ID_conversion.txt", header = T, sep = "\t")
# merge tables to create conversion table
conversion_table.1   <- merge(blast_table       , name_table, by    = "ensembl_gene_id")
conversion_table.2   <- merge(conversion_table.1, prot_table, by.x  = "killi_prot"     , by.y = "protein_id")
# Subset columns to retain only Killifish gene and Human Gene Symbol
# Since this annotation version can have more than 1 protein per gene, deduplicate
conversion_table     <- unique(conversion_table.2[,c("gene_id","Human_Gene_Symbol")])
View(conversion_table)
View(conversion_table.2)
colnames(conversion_table.2)
write.table(unique(conversion_table.2[,c("killi_prot", "gene_id","ensembl_gene_id" , "Human_Gene_Symbol")]), file = paste0("./Results/", Sys.Date(),"_Killi_human_homology_table.txt"), sep = "\t", quote = F)
write.table(unique(conversion_table.2[,c("killi_prot", "gene_id","ensembl_gene_id" , "Human_Gene_Symbol")]), file = paste0("./Results/", Sys.Date(),"_Killi_human_homology_table.txt"), sep = "\t", quote = F, row.names = F)
# Output homology table for manuscript
write.table(unique(conversion_table.2[,c("killi_prot", "gene_id","ensembl_gene_id" , "Human_Gene_Symbol")]), file = paste0("./Results/", Sys.Date(),"_Killi_human_homology_table.txt"), sep = "\t", quote = F, row.names = F)
