## Plot overlap between GO terms and CHD8-repressed genes.

data_gene <- scan("gene_T_1.txt", what = "character", sep = "\t")

data_gene_1 <- scan("gene_T145_up.txt", what = "character", sep = "\t")

data_intersect <- intersect(data_gene, data_gene_1)

data_intersect <- sort(data_intersect)

write.table(data_intersect, file = "gene_repressed.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_gene <- scan("gene_repressed.txt", what = "character", sep = "\t")

data_term <- scan("GO_3.txt", what = "character", sep = "\t")

for (i in 1:length(data_term))
{
    file_name <- paste("./GO/", ".txt", sep = data_term[i])

    file_name <- gsub(":", "_", file_name)

    data_GO <- scan(file_name, what = "character", sep = "\t")

    data_intersect <- intersect(data_gene, data_GO)

    data_union <- union(data_gene, data_GO)

    data_intersect_union <- matrix(data = NA, nrow = 2, ncol = 2)

    data_intersect_union[1,1] <- length(data_intersect)

    data_intersect_union[1,2] <- length(data_gene)-length(data_intersect)

    data_intersect_union[2,1] <- length(data_GO)-length(data_intersect)

    data_intersect_union[2,2] <- 9168-length(data_union)

    data_test <- fisher.test(data_intersect_union, alternative = "greater")

    data_or <- data_test$estimate

    data_p <- data_test$p.value

    data_size <- length(data_intersect)

    data_size_1 <- length(data_GO)

    data_ratio <- length(data_intersect)/length(data_GO)

    data_size_2 <- length(data_gene)

    data_ratio_1 <- length(data_intersect)/length(data_gene)

    data_GO_1 <- c(data_term[i], data_or, data_p, data_size, data_ratio, data_size_1, data_ratio_1, data_size_2)

    write.table(t(data_GO_1), file = "overlap_repressed.txt", append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
}

data_GO <- read.table("overlap_repressed.txt", sep = "\t")

data_p <- data_GO[,3]

data_fdr <- p.adjust(data_p, method = "BH")

data_GO_1 <- data.frame(data_GO[,1:2], data_fdr, data_GO[,3:ncol(data_GO)])

write.table(data_GO_1, file = "overlap_repressed_1.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_GO <- read.table("../GO_overlap/GO.txt", sep = "\t", quote = "", colClasses = "character", comment.char = "")

data_GO_1 <- read.table("overlap_repressed_1.txt", sep = "\t", colClasses = "character")

data_GO_2 <- merge(data_GO, data_GO_1, by = 1, all.y = TRUE)

write.table(data_GO_2, file = "overlap_repressed_2.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

pdf(file = "overlap_repressed.pdf", width = 8, height = 3)

data_GO <- read.table("overlap_repressed_2.txt", sep = "\t", quote = "", comment.char = "")

data_GO_1 <- data.frame(data_GO[,4], data_GO[,1:3], data_GO[,5:ncol(data_GO)])

data_GO_1 <- data_GO_1[ do.call(order, data_GO_1) ,]

data_GO_2 <- data_GO_1[c(17, 15, 11, 3, 2),]

data_GO_2[,1] <- -log10(data_GO_2[,1])

par(mar = c(2, 34.3, 4.5, 2))

par(mgp = c(3.5, 1, 0))

par(cex.axis = 2.5)

par(cex.lab = 2.5)

par(cex.main = 3)

barplot(data_GO_2[,1], width = 1, space = NULL, names.arg = NULL, beside = TRUE, horiz = TRUE, col = "#6666FF", main = "", xlab = NULL, ylab = "", xlim = c(0, 2), ylim = NULL, axes = FALSE)

par(cex.axis = 1.4)

axis(2, 1:nrow(data_GO_2)*1.2-0.5, as.character(data_GO_2[,3]), las = 2)

par(cex.axis = 1.5)

axis(3, c(0:2), c(0:2), las = 1)

dev.off()
