path="/Users/agkaessmann/Projects/Anolis/Documentation_dosage_compensation"
library("biomaRt")

ensembl <- useMart("ENSEMBL_MART_ENSEMBL", host = "oct2014.archive.ensembl.org") #Ensembl 77
ensembl <- useDataset("hsapiens_gene_ensembl", ensembl)
orthologs <- getBM(attributes = c('ensembl_gene_id',
                                  'mmusculus_homolog_ensembl_gene', 
                                  'mmusculus_homolog_orthology_type',
                                  'mdomestica_homolog_ensembl_gene',
                                  'mdomestica_homolog_orthology_type',
                                  'oanatinus_homolog_ensembl_gene',
                                  'oanatinus_homolog_orthology_type',
                                  'acarolinensis_homolog_ensembl_gene',
                                  'acarolinensis_homolog_orthology_type',
                                  'xtropicalis_homolog_ensembl_gene',
                                  'xtropicalis_homolog_orthology_type'
                                  ), 
                                  mart = ensembl)

#add gene type
gene_type <- getBM(attributes = c('ensembl_gene_id', 'gene_biotype'), mart = ensembl)
orthologs <- merge(orthologs, gene_type)

#Extract Human Mouse Opossum Platypus Chicken Lizard 1:1 protein coding orthologs
orthologs <- subset(orthologs, grepl("ENSG", ensembl_gene_id) 
                    & grepl("ENSMUS", mmusculus_homolog_ensembl_gene)
                    & grepl("ENSMOD", mdomestica_homolog_ensembl_gene)
                    & grepl("ENSOAN", oanatinus_homolog_ensembl_gene)
                    & grepl("ENSACA", acarolinensis_homolog_ensembl_gene)
                    & grepl("ENSXET", xtropicalis_homolog_ensembl_gene)
                    & mmusculus_homolog_orthology_type == "ortholog_one2one"
                    & mdomestica_homolog_orthology_type == "ortholog_one2one"
                    & oanatinus_homolog_orthology_type == "ortholog_one2one"
                    & acarolinensis_homolog_orthology_type == "ortholog_one2one"
                    & xtropicalis_homolog_orthology_type == "ortholog_one2one"
                    & gene_biotype == "protein_coding")

one2one.6sp.no.chicken <- data.frame(row.names = rownames(orthologs),
                                     "human" = orthologs$ensembl_gene_id,
                                     "mouse" = orthologs$mmusculus_homolog_ensembl_gene,
                                     "opossum" = orthologs$mdomestica_homolog_ensembl_gene,
                                     "platypus" = orthologs$oanatinus_homolog_ensembl_gene,
                                     "lizard" = orthologs$acarolinensis_homolog_ensembl_gene,
                                     "xenopus" = orthologs$xtropicalis_homolog_ensembl_gene)

save(one2one.6sp.no.chicken, file = paste0(path, "/one2one.6sp.no.chicken.Rdata"))
