##########################################################
##########################################################
#####   Python code to convert protein alignments  #######
##########   into codon alignment using  #################
########    guidance alignement filtering    #############
##########################################################

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct  3 17:58:14 2019

@author: bfouks
"""
### NB: tabs (\t) == 4 spaces in python script

import os, os.path
from Bio import AlignIO
from Bio import SeqIO
from Bio.Data import CodonTable
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
import collections

standard_table = CodonTable.unambiguous_dna_by_id[1]
stpcdn = standard_table.stop_codons


def matchDNA_AA(filaln,dnaseq,aaseq,seqid,fltcol):
    codon_table = {
            'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
            'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
            'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
            'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
            'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
            'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
            'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
            'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
            'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
            'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
            'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
            'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
            'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
            'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
            'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
            'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'} # import standard codon table
    postchck = False
    extrarem = []
    errors = [] # create list to put all errors
    nsymbol = ['A','T','C','G','N'] # list of accepted nucleotide symbol
    if len([i for i in dnaseq if i not in nsymbol]) > 0: # check there is no ambiguous nucleotide code
        errors.append("Ambiguous nucleotide code present file: %s, seq: %s, position: %s=%s" % (filaln,seqid))
        postchck == TRUE # need to be checked
    codons = [dnaseq[n:n+3] for n in range(0,len(dnaseq),3)] # parse cds seq by codon
    if stpcdn in codons: # check if stop codon in cds
        errors.append("Stop codon present file: %s, seq: %s, position: %s=%s\n" % (filaln,seqid))
	postchck == TRUE # need to be checked
    if len(codons) != len(aaseq.replace('-','')): # check that length of cds match length of protein seq
        errors.append("Protein and cds seqs length differ: %s, seq: %s, position: %s=%s" % (filaln,seqid))
	postchck == TRUE # need to be checked
    gdseq = "" # create empty string to append codons
    x = 0 # position of codons
    z = 1 # position of filtered aa in guidance file
    y = 1 # position of errors
    newnuc = [] # create list of all aa from alignment to check good translation
    newseq = [] # create a list of all codons to check good translation
    for nuc in aaseq:            
        if nuc == "-": # gap in protein alignment file
            newseq.append("---") # gaps for codons
            newnuc.append("-")
            if z in fltcol: # check that aa pass filtering test from guidance
                gdseq = gdseq + "---" # if yes then append codon alignment
                y += 1
        else: # nog gaps
            if "N" in codons[x]: # check if undefined nucleotidein codon
                if str(codons[x]).startswith(("AC","CT","CC","CG","GT","GC","GG","TC")): # check if undefined nucleotide (N) does not change aa
                    codnew = str(codons[x]).replace("N","A") # no change of aa then a a random nucleotide to match both original aa with translated aa
                    trslt_nuc = codon_table[codnew] # translate codon into aa
                    if nuc != trslt_nuc: # the original aa is NOT similar to the translated one raise error and append 
                        errors.append("problem N for aln file: %s, seq: %s, position: %s=%s ==> " % (filaln,seqid,z,y) + nuc + "::" + trslt_nuc + "->" + str(codons[x]))
                        postchck = True
                else: # presence of undefined nucleotide (N) lead to undefind aa 
                    trslt_nuc = "X" # translated aa is undefined
            else:
                trslt_nuc = codon_table[codons[x]] ## no Ns so translate codon into aa
            if z in fltcol: # check that aa pass filtering test from guidance 
                gdseq = gdseq + codons[x] # if yes append original codon
                if nuc != trslt_nuc: # original aa and translated codon does NOT match raise error 
                    errors.append("problem for aln file: %s, seq: %s, position: %s=%s" % (filaln,seqid,z,y) + nuc + "::" + trslt_nuc + "->" + str(codons[x]))
                    postchck = True ## state that sequences need a post check in order to match protein and cds seqs
                y += 1
            newnuc.append(trslt_nuc)
            newseq.append(codons[x])
            x += 1
        z += 1            
    return gdseq, newnuc, newseq, postchck, errors

def prot2cdn(pathguidance,pathcds,pathcdsaln):
    for files in os.listdir(pathguidance):
        remvcol = open("%s%s/MSA.PRANK.Guidance2_res_pair_col.scr.csv" % (pathguidance,files)) # iterate over guidnace filtering columns to find unreliable positions
        with remvcol as f:
            next(f)    
            fltcol = [int(line.split(",")[0]) for line in remvcol if float(line.split(",")[-1].strip()) >= 0.93] # create list of unreliable alignment posiitons
        remvcol.close()
        cdsaln = open("%s%s.fasta" % (pathcds,files)) # open raw cds file
        cds_trsl = SeqIO.parse(cdsaln, "fasta") # iterate over cds file with SeqIO
        cds_t = AlignIO.read(cdsaln, "fasta") ## parse cds file to check number of seqs
        lencds = len(cds_t) # get number of seqs
        o = 0 # check that all seqs have been correctly transform according to protein alignment
        fileout = open("%s%s.fa" % (pathcdsaln,files),"w") ## write file of cds aligment
        for cdsseq in cds_trsl:
            seqid = cdsseq.id  # store id of sequence to match with id of protein alignment
            seque = cdsseq.seq # sequence as string
            if "ATG" in seque[:3] or "CTG" in seque[:3] or "TTG" in seque[:3]:  ## check if codon seq start with start codon needed for .translate() function
                newseq = SeqRecord(Seq(seque), id = cdsseq.id)
                protslt = newseq.translate(table="Standard",to_stop=False) ## translate codon seq into protein using standard code to check match with protein alignment
            else: ## if no start codon add start codon and then remove it at the end
                sequ = "ATG" + seque
                newseq = SeqRecord(Seq(sequ), id = cdsseq.id)
                protslt = newseq.translate(table="Standard",to_stop=False)
                protslt = protslt[1:] # remove added start codon
            gdfltaln = open("%s%s/UserMSA" % (pathguidance,files)) ## import protein alignment from guidance file
            aln = SeqIO.parse(gdfltaln,"fasta") ## parse it with SeqIO
            for record in aln:
                if record.id == seqid: ## match cds seq id with protein alignment seq id  
                    aaseq = str(record.seq) # seq as string
                    mtchseqs = matchDNA_AA(files,newseq,aaseq,protslt.id,fltcol)
                    if mtchseqs[3] == True:
                        o += 1 # specificy that seqs has passed correctly from protein alignment to codon alignment
            fileout.write(">"+ str(seqid) + "\n" + str(mtchseqs[0]) + "\n")
        fileout.close()
        if o != lencds: # all codon seqs could not be aligned according to protein alignment 
            os.system("rm %s%s.fa" % (pathcdsaln,files)) ## remove alignments

prot2cdn('/user/Documents/PAML/guidance/','/user/Documents/PAML/raw_cds/','/user/Documents/PAML/aln_cds/')    
        

##########################################################
##########################################################
#####   R code to run functional category analyses #######
##########   using SUMSTAT methods  ######################
##########################################################
##########################################################

#!/usr/bin/env Rscript
require(topGO)
require(Rgraphviz)
require(permute)
require(qvalue)
require(ViSEAGO)

###########################
#### SUMSTAT method #######
###########################
## Category test based on score sums (home-made) Create home made SUMSTAT analysis
if(!isGeneric("GOSumT"))
  setGeneric("GOSumT", function(object) standardGeneric("GOSumT"))

setMethod("GOSumT", "classicScore",
          function(object) {

            nG <- numMembers(object)
            if(nG == 0) return(1)
            N <- sqrt(nG*var(allScore(object,FALSE)))

            ## compute the group sum (statistic)
            sObserved <- sum(membersScore(object))
	    zscore <- (sObserved -nG*mean(allScore(object,FALSE))) / N  
            ## 
            return(ifelse(zscore < 0,pnorm(zscore)*2,(1-pnorm(zscore))*2))
	    })

if(!isGeneric("Direct"))
  setGeneric("Direct", function(object) standardGeneric("Direct"))

setMethod("Direct", "classicScore",
          function(object) {

            nG <- numMembers(object)
            if(nG == 0) return(1)
            N <- sqrt(nG*var(allScore(object,FALSE)))

            ## compute the group sum (statistic)
            sObserved <- sum(membersScore(object))
	    zscore <- (sObserved -nG*mean(allScore(object,FALSE))) / N  
            ## 
            return(ifelse(zscore < 0,-1,+1))
	    })


### HERE define the significance threshold which is basically the LRT value for which FDR is < 0.1
siglvl <- 17.28^(1/4) # m8 = 12.55^(1/4) 
###############################
##### Cellular Component ######
###############################


adpsumstc <- read.csv("aADscor_sumstat_GO_C.csv") # import GO file for cellular component from AD branch
adpsumstc$count <- rep(1,length(adpsumstc[,1])) # create a column for counting how many GO category a gene has
l <- tapply(adpsumstc$count,adpsumstc$GOterm,sum) # calculate the total number of gene per GO term
adpsumstc$counttot <- l[adpsumstc$GOterm] # append this total number of genes per GO term in table

head(adpsumstc)

## Format data for running analysis
GO2geneIDc <- adpsumstc[,2] 
names(GO2geneIDc) = as.character(adpsumstc[,1])
geneID2GOc <- inverseList(GO2geneIDc)

## create function to define which genes are significant
int.genes <- function(allScore) {
	return(allScore > siglvl)
}

## get the LRT fourth root for a specific gene
all.genesc <- tapply(adpsumstc$score,as.character(adpsumstc$GeneF),mean)
## create a subset of significant gene
selgenec <- all.genesc[all.genesc > siglvl]
## import data in topGO
go.objc <- new("topGOdata", ontology='CC', allGenes = all.genesc, geneSel = int.genes,
	annot = annFUN.gene2GO, gene2GO = geneID2GOc, nodeSize = 10)
## run SUMSTAT analysis
test.state <- new("elimScore", testStatistic = GOSumT, name = "fisher",scoreOrder = "decreasing")
resumTec <- getSigGroups(go.objc,test.state)
showSigOfNodes(go.objc,score(resumTec),firstSigNodes = 5, useInfo = ' all ' )
printGraph(go.objc,resumTec, firstSigNodes = 5, fn.prefix = "tGO_CC_AD_no_wz_10n", useInfo = "all", pdfSW = TRUE)
scTec <- score(resumTec)
## subset of significant GOs
scec <- scTec[scTec <0.05]

## run test to get the direction of significance if enriched or depleted
direc.teste <- new("elimScore", testStatistic = Direct, name = "fisher",scoreOrder = "decreasing")
resumDec <- getSigGroups(go.objc,direc.teste)
scDec <- score(resumDec)

## run SUMSTAT analysis without the elim algorithm
test.statc <- new("classicScore", testStatistic = GOSumT, name = "fisher",scoreOrder = "decreasing")
resumTcc <- getSigGroups(go.objc,test.statc)
scTcc <- score(resumTcc)
## subset of significant GOs
sccc <- scTcc[scTcc <0.05]
## run FDR
FDRelimc <- qvalue(score(resumTec),pfdr = TRUE)
pvalFDRc <- FDRelimc$lfdr

tnodc <- ifelse(length(pvalFDRc[pvalFDRc < 0.2]) < 2, 3,length(pvalFDRc[pvalFDRc < 0.2]))

## Get table of significant results
allResc <- GenTable(go.objc, classic = resumTcc, KS = resumTec, orderBy = "KS", ranksOf = "classic", topNodes = tnodc)
allResc

## Get all results
resultsc <- as.data.frame(allResc)
resultsc$SumStfdr <- pvalFDRc[resultsc$GO.ID]
resultsc$dir <- ifelse(scDec[resultsc$GO.ID] < 0,"-","+")
resultsc

## Get mean score of GO depending on its number of genes
alnodc <- length(pvalFDRc)

aResc <- GenTable(go.objc, classic = resumTcc, KS = resumTec, orderBy = "KS", ranksOf = "classic", topNodes = alnodc)

alresc <- as.data.frame(aResc)
alresc$SumStfdr <- pvalFDRc[alresc$GO.ID]
alresc$dir <- ifelse(scDec[alresc$GO.ID] < 0,"-","+")
allSc <- geneScore(go.objc, use.names = TRUE)
GOscorec <- list()
for(i in 1:alnodc){
		GOid = alresc[i,1]
		groupMembers <- genesInTerm(go.objc, GOid)[[1]]
		GOscorec[i] <- sum(allSc[names(allSc) %in% groupMembers])
	}
alresc$GO_score <- unlist(GOscorec)
alresc$ratio_GOscore <- alresc$GO_score/alresc$Annotated

## Create graphs to check up results and significance of GOs in function of LRT values of its genes
allSc <- geneScore(go.objc, use.names = TRUE)
gonsigc <- ifelse(length(resultsc[resultsc$SumStfdr < 0.2,1]) < 1,2,length(resultsc[resultsc$SumStfdr < 0.2,1]))
gosigc <- resultsc[c(1:gonsigc),]
GOgec <- list()
for(i in 1:length(gosigc[,1])){
		GOid = gosigc[i,1]
		groupMembers <- genesInTerm(go.objc, GOid)[[1]]
		GOgec[i] <- list(allSc[names(allSc) %in% groupMembers])
	}
names(GOgec) <- gosigc[,1]
pdf("Check_density_of_sig_GOs_CC_no_AD_wz_10n.pdf",width=6.5,height=6)
for(i in 1:length(gosigc[,1])){
	plot(density(GOgec[[i]]))
	}
dev.off()
GO_idc <- list()
GenFc <- list()
Scorgc <- list()
for(i in 1:length(gosigc[,1])){
	goid <- rep(gosigc[i,1],length(GOgec[[i]]))
	GO_idc <- c(GO_idc,goid)
	genf <- names(GOgec[[i]])
	GenFc <- c(GenFc,genf)
	scg <- GOgec[[i]]
	Scorgc <- c(Scorgc,scg)
	}

gGO_idc <- unlist(GO_idc)
gGenFc <- unlist(GenFc)
gScorgc <- unlist(Scorgc)
chec <- cbind(gGO_idc,gGenFc,gScorgc)
write.csv(chec,"SigGOs_GF&scor_AD_no_wz_10n_C.csv")


###############################
##### Molecular Function ######
###############################

adpsumstf <- read.csv("aADscor_sumstat_GO_F.csv")
adpsumstf$count <- rep(1,length(adpsumstf[,1]))
l <- tapply(adpsumstf$count,adpsumstf$GOterm,sum)
adpsumstf$counttot <- l[adpsumstf$GOterm]

GO2geneIDf <- adpsumstf[,2]
names(GO2geneIDf) = as.character(adpsumstf[,1])
geneID2GOf <- inverseList(GO2geneIDf)

int.genes <- function(allScore) {
	return(allScore > siglvl)
}

all.genesf <- tapply(adpsumstf$score,adpsumstf$GeneF,mean)

selgenef <- all.genesf[all.genesf > siglvl]
go.objf <- new("topGOdata", ontology='MF', allGenes = all.genesf, geneSel = int.genes,
	annot = annFUN.gene2GO, gene2GO = geneID2GOf, nodeSize = 10)

test.state <- new("elimScore", testStatistic = GOSumT, name = "fisher",scoreOrder = "decreasing")
resumTef <- getSigGroups(go.objf,test.state)
showSigOfNodes(go.objf,score(resumTef),firstSigNodes = 5, useInfo = ' all ' )
printGraph(go.objf,resumTef, firstSigNodes = 5, fn.prefix = "tGO_MF_AD_no_wz_10n", useInfo = "all", pdfSW = TRUE)
scTef <- score(resumTef)
scef <- scTef[scTef <0.05]

test.statc <- new("classicScore", testStatistic = GOSumT, name = "fisher",scoreOrder = "decreasing")
resumTcf <- getSigGroups(go.objf,test.statc)
scTcf <- score(resumTcf)
sccf <- scTcf[scTcf <0.05]

direc.teste <- new("elimScore", testStatistic = Direct, name = "fisher",scoreOrder = "decreasing")
resumDef <- getSigGroups(go.objf,direc.teste)
scDef <- score(resumDef)

FDRelimf <- qvalue(score(resumTef),pfdr = TRUE)
pvalFDRf <- FDRelimf$lfdr

tnodf <- ifelse(length(pvalFDRf[pvalFDRf < 0.2]) < 2, 3,length(pvalFDRf[pvalFDRf < 0.2]))

allResf <- GenTable(go.objf, classic = resumTcf, KS = resumTef, orderBy = "KS", ranksOf = "classic", topNodes = tnodf)
allResf

resultsf <- as.data.frame(allResf)
resultsf$SumStfdr <- pvalFDRf[resultsf$GO.ID]
resultsf$dir <- ifelse(scDef[resultsf$GO.ID] < 0,"-","+")
resultsf

alnodf <- length(pvalFDRf)

aResf <- GenTable(go.objf, classic = resumTcf, KS = resumTef, orderBy = "KS", ranksOf = "classic", topNodes = alnodf)

alresf <- as.data.frame(aResf)
alresf$SumStfdr <- pvalFDRf[alresf$GO.ID]
alresf$dir <- ifelse(scDef[alresf$GO.ID] < 0,"-","+")
allSf <- geneScore(go.objf, use.names = TRUE)
GOscoref <- list()
for(i in 1:alnodf){
		GOid = alresf[i,1]
		groupMembers <- genesInTerm(go.objf, GOid)[[1]]
		GOscoref[i] <- sum(allSf[names(allSf) %in% groupMembers])
	}
alresf$GO_score <- unlist(GOscoref)
alresf$ratio_GOscore <- alresf$GO_score/alresf$Annotated

allSf <- geneScore(go.objf, use.names = TRUE)
gonsigf <- ifelse(length(resultsf[resultsf$SumStfdr < 0.2,1]) < 1,2,length(resultsf[resultsf$SumStfdr < 0.2,1]))
gosigf <- resultsf[c(1:gonsigf),]
GOgef <- list()
for(i in 1:length(gosigf[,1])){
		GOid = gosigf[i,1]
		groupMembers <- genesInTerm(go.objf, GOid)[[1]]
		GOgef[i] <- list(allSf[names(allSf) %in% groupMembers])
	}
names(GOgef) <- gosigf[,1]
pdf("Check_density_of_sig_GOs_MF_no_AD_wz_10n.pdf",width=6.5,height=6)
for(i in 1:length(gosigf[,1])){
	plot(density(GOgef[[i]]))
	}
dev.off()
GO_idf <- list()
GenFf <- list()
Scorgf <- list()
for(i in 1:length(gosigf[,1])){
	goid <- rep(gosigf[i,1],length(GOgef[[i]]))
	GO_idf <- c(GO_idf,goid)
	genf <- names(GOgef[[i]])
	GenFf <- c(GenFf,genf)
	scg <- GOgef[[i]]
	Scorgf <- c(Scorgf,scg)
	}

gGO_idf <- unlist(GO_idf)
gGenFf <- unlist(GenFf)
gScorgf <- unlist(Scorgf)
chef <- cbind(gGO_idf,gGenFf,gScorgf)
write.csv(chef,"SigGOs_GF&scor_AD_no_wz_10n_F.csv")

#################################
##### Biological Processes ######
#################################
adpsumstp <- read.csv("aADscor_sumstat_GO_P.csv")
adpsumstp$count <- rep(1,length(adpsumstp[,1]))
l <- tapply(adpsumstp$count,adpsumstp$GOterm,sum)
adpsumstp$counttot <- l[adpsumstp$GOterm]

GO2geneIDp <- adpsumstp[,2]
names(GO2geneIDp) = as.character(adpsumstp[,1])
geneID2GOp <- inverseList(GO2geneIDp)

int.genes <- function(allScore) {
	return(allScore > siglvl)
}

all.genesp <- tapply(adpsumstp$score,adpsumstp$GeneF,mean)

selgenep <- all.genesp[all.genesp > siglvl]
go.objp <- new("topGOdata", ontology='BP', allGenes = all.genesp, geneSel = int.genes,
	annot = annFUN.gene2GO, gene2GO = geneID2GOp, nodeSize = 10)

test.state <- new("elimScore", testStatistic = GOSumT, name = "fisher",scoreOrder = "decreasing")
resumTep <- getSigGroups(go.objp,test.state)
showSigOfNodes(go.objp,score(resumTep),firstSigNodes = 5, useInfo = ' all ' )
printGraph(go.objp,resumTep, firstSigNodes = 5, fn.prefix = "tGO_BP_AD_no_wz_10n", useInfo = "all", pdfSW = TRUE)
scTep <- score(resumTep)
scep <- scTep[scTep <0.05]

test.statc <- new("classicScore", testStatistic = GOSumT, name = "fisher",scoreOrder = "decreasing")
resumTcp <- getSigGroups(go.objp,test.statc)
scTcp <- score(resumTcp)
sccp <- scTcp[scTcp <0.05]

direc.teste <- new("elimScore", testStatistic = Direct, name = "fisher",scoreOrder = "decreasing")
resumDep <- getSigGroups(go.objp,direc.teste)
scDep <- score(resumDep)


allGOres <- c(scTec,scTef,scTep)

FDRelimp <- qvalue(scTep,pfdr = TRUE)
pvalFDRp <- FDRelimp$lfdr

tnodp <- ifelse(length(pvalFDRp[pvalFDRp < 0.2]) < 2, 3,length(pvalFDRp[pvalFDRp < 0.2]))

allResp <- GenTable(go.objp, classic = resumTcp, KS = resumTep, orderBy = "KS", ranksOf = "classic", topNodes = tnodp)
allResp

resultsp <- as.data.frame(allResp)
resultsp$SumStfdr <- pvalFDRp[resultsp$GO.ID]
resultsp$dir <- ifelse(scDep[resultsp$GO.ID] < 0,"-","+")
resultsp

alnodp <- length(pvalFDRp)

aResp <- GenTable(go.objp, classic = resumTcp, KS = resumTep, orderBy = "KS", ranksOf = "classic", topNodes = alnodp)

alresp <- as.data.frame(aResp)
alresp$SumStfdr <- pvalFDRp[alresp$GO.ID]
alresp$dir <- ifelse(scDep[alresp$GO.ID] < 0,"-","+")
allSp <- geneScore(go.objp, use.names = TRUE)
GOscorep <- list()
for(i in 1:alnodp){
		GOid = alresp[i,1]
		groupMembers <- genesInTerm(go.objp, GOid)[[1]]
		GOscorep[i] <- sum(allSp[names(allSp) %in% groupMembers])
	}
alresp$GO_score <- unlist(GOscorep)
alresp$ratio_GOscore <- alresp$GO_score/alresp$Annotated

allSp <- geneScore(go.objp, use.names = TRUE)
gonsigp <- ifelse(length(resultsp[resultsp$SumStfdr < 0.2,1]) < 1,2,length(resultsp[resultsp$SumStfdr < 0.2,1]))
gonsigp
gosigp <- resultsp[c(1:gonsigp),]
GOgep <- list()
for(i in 1:length(gosigp[,1])){
		GOid = gosigp[i,1]
		groupMembers <- genesInTerm(go.objp, GOid)[[1]]
		GOgep[i] <- list(allSp[names(allSp) %in% groupMembers])
	}
names(GOgep) <- gosigp[,1]
pdf("Check_density_of_sig_GOs_BP_no_AD_wz_10n.pdf",width=6.5,height=6)
for(i in 1:length(gosigp[,1])){
	plot(density(GOgep[[i]]))
	}
dev.off()
GO_idp <- list()
GenFp <- list()
Scorgp <- list()
for(i in 1:length(gosigp[,1])){
	goid <- rep(gosigp[i,1],length(GOgep[[i]]))
	GO_idp <- c(GO_idp,goid)
	genf <- names(GOgep[[i]])
	GenFp <- c(GenFp,genf)
	scg <- GOgep[[i]]
	Scorgp <- c(Scorgp,scg)
	}

gGO_idp <- unlist(GO_idp)
gGenFp <- unlist(GenFp)
gScorgp <- unlist(Scorgp)
chep <- cbind(gGO_idp,gGenFp,gScorgp)
write.csv(chep,"SigGOs_GF&scor_AD_no_wz_10n_P.csv")

## COMPILE results from GO analysis of CC, MF and BP 
allresults <- rbind(resultsc,resultsf,resultsp)
allresults$Process <- c(rep("CC",length(resultsc[,1])),rep("MF",length(resultsf[,1])),rep("BP",length(resultsp[,1])))

write.table(allresults,"results_GO_AD_no_wz_10n.csv", sep="\t")

alreslts <- rbind(alresc,alresf,alresp)
alreslts$Process <- c(rep("CC",length(alresc[,1])),rep("MF",length(alresf[,1])),rep("BP",length(alresp[,1])))

write.table(alreslts,"all_results_GO_AD_no_wz_10n.csv", sep="\t")