## Calculate overlap between eight NDD gene sets.

data_gene <- scan("../expression/gene_high_1.txt", what = "character", sep = "\t")

gene_set <- scan("gene_set.txt", what = "character", sep = "\t")

gene_set <- gene_set[1:8]

for (i in 1:length(gene_set))
{
    file_name <- paste("overlap_", ".txt", sep = as.character(i))

    file_name_1 <- paste(gene_set[i], ".txt", sep = "")

    data_gene_1 <- scan(file_name_1, what = "character", sep = "\t")

    data_gene_1 <- intersect(data_gene, data_gene_1)

    for (j in 1:length(gene_set))
    {
        file_name_2 <- paste(gene_set[j], ".txt", sep = "")

        data_gene_2 <- scan(file_name_2, what = "character", sep = "\t")

        data_gene_2 <- intersect(data_gene, data_gene_2)

        data_intersect <- intersect(data_gene_1, data_gene_2)

        data_union <- union(data_gene_1, data_gene_2)

        data_intersect_union <- matrix(data = NA, nrow = 2, ncol = 2)

        data_intersect_union[1,1] <- length(data_intersect)

        data_intersect_union[1,2] <- length(data_gene_1)-length(data_intersect)

        data_intersect_union[2,1] <- length(data_gene_2)-length(data_intersect)

        data_intersect_union[2,2] <- length(data_gene)-length(data_union)

        data_test <- fisher.test(data_intersect_union, alternative = "greater")

        data_p <- data_test$p.value

        data_n <- length(data_intersect)

        data_p_n <- c(NA, data_n)

        if (i!=j)
        {
            data_p_n <- c(data_p, data_n)
        }

        write.table(t(data_p_n), file = file_name, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
    }
}

data_overlap_1 <- read.table("overlap_1.txt", sep = "\t", colClasses = "character")

data_overlap_2 <- read.table("overlap_2.txt", sep = "\t", colClasses = "character")

data_overlap_3 <- read.table("overlap_3.txt", sep = "\t", colClasses = "character")

data_overlap_4 <- read.table("overlap_4.txt", sep = "\t", colClasses = "character")

data_overlap_5 <- read.table("overlap_5.txt", sep = "\t", colClasses = "character")

data_overlap_6 <- read.table("overlap_6.txt", sep = "\t", colClasses = "character")

data_overlap_7 <- read.table("overlap_7.txt", sep = "\t", colClasses = "character")

data_overlap_8 <- read.table("overlap_8.txt", sep = "\t", colClasses = "character")

data_overlap_p <- data.frame(data_overlap_1[,1], data_overlap_2[,1], data_overlap_3[,1], data_overlap_4[,1], data_overlap_5[,1], data_overlap_6[,1], data_overlap_7[,1], data_overlap_8[,1])

write.table(data_overlap_p, file = "overlap_p.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_overlap_n <- data.frame(data_overlap_1[,2], data_overlap_2[,2], data_overlap_3[,2], data_overlap_4[,2], data_overlap_5[,2], data_overlap_6[,2], data_overlap_7[,2], data_overlap_8[,2])

write.table(data_overlap_n, file = "overlap_n.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_overlap_p <- read.table("overlap_p.txt", sep = "\t")

data_overlap_p <- -log10(data_overlap_p)

write.table(data_overlap_p, file = "overlap_p_1.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_overlap_p <- read.table("overlap_p.txt", sep = "\t")

data_overlap_p <- signif(data_overlap_p, digits = 2)

write.table(data_overlap_p, file = "overlap_p_2.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
