## Generate GO biological process terms and the associated genes using genes that are expressed in NPCs as background genes.

data_GO <- read.table("GOA_G2T.txt", sep = "\t", colClasses = "character")

data_gene <- read.table("../expression/NPCs/gene_T_1.txt", sep = "\t", colClasses = "character")

data_GO_1 <- merge(data_GO, data_gene, by = 1)

write.table(data_GO_1, file = "GO_1.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_GO <- read.table("GO_1.txt", sep = "\t", colClasses = "character")

data_term <- data_GO[,2]

data_term <- unique(data_term)

data_term <- sort(data_term)

data_size <- numeric(length = length(data_term))

for (i in 1:length(data_term))
{
    data_gene <- data_GO[data_GO[,2]==data_term[i],]

    data_gene <- data_gene[,1]

    data_gene <- unique(data_gene)

    data_size[i] <- length(data_gene)
}

data_GO_1 <- data.frame(data_term, data_size)

write.table(data_GO_1, file = "GO_2.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_GO <- read.table("GO_2.txt", sep = "\t")

data_GO_1 <- data_GO[data_GO[,2]>=10,]

data_GO_1 <- data_GO_1[data_GO_1[,2]<=1000,]

data_term <- data_GO_1[,1]

data_term <- sort(data_term)

write.table(data_term, file = "GO_3.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_GO <- read.table("GO_1.txt", sep = "\t", colClasses = "character")

data_term <- scan("GO_3.txt", what = "character", sep = "\t")

dir.create("GO")

for (i in 1:length(data_term))
{
    data_GO_1 <- data_GO[data_GO[,2]==data_term[i],]

    data_GO_1 <- data_GO_1[,1]

    data_GO_1 <- unique(data_GO_1)

    data_GO_1 <- sort(data_GO_1)

    file_name <- paste("./GO/", ".txt", sep = data_term[i])

    file_name <- gsub(":", "_", file_name)

    write.table(data_GO_1, file = file_name, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
}
