## Co-expression enrichment analysis of NDD genes in six major cell types by controlling for gene set size.

cell_type <- scan("../expression/sample_id_9.txt", what = "character", sep = "\t")

data_set_size <- scan("../gene_set/set_size_1.txt", sep = "\t")

for (i in 1:length(cell_type))
{
    file_name <- paste("../expression/", "/GSE_8.txt", sep = cell_type[i])

    data_expression <- read.table(file_name, sep = "\t")

    data_gene <- data_expression[,1]

    data_position <- 1:length(data_gene)

    data_position_1 <- data.frame(data_gene, data_position)

    data_expression_1 <- data_expression[,2:ncol(data_expression)]

    data_expression_1 <- t(data_expression_1)

    colnames(data_expression_1) <- data_position

    data_coexpression <- cor(data_expression_1, method = "spearman")

    data_coexpression_1 <- data_coexpression[upper.tri(data_coexpression)]

    data_coexpression_1 <- data_coexpression_1[!is.na(data_coexpression_1)]

    data_coexpression_1 <- sort(data_coexpression_1, decreasing = TRUE)

## Calculate co-expression network threshold.

    data_threshold_position <- length(data_coexpression_1)*0.005

    data_threshold_position <- round(data_threshold_position, digits = 0)

    data_threshold <- data_coexpression_1[data_threshold_position]

## Adjust co-expression network density if there are ties of threshold.

    data_coexpression_2 <- data_coexpression_1[data_coexpression_1>=data_threshold]

    data_adjusted_density <- length(data_coexpression_2)/length(data_coexpression_1)

    gene_set <- scan("../gene_set/gene_set.txt", what = "character", sep = "\t")

    file_name_1 <- paste("score_gene_", ".txt", sep = cell_type[i])

    for (j in 1:length(gene_set))
    {
        file_name_2 <- paste("../gene_set/", ".txt", sep = gene_set[j])

        data_gene_1 <- scan(file_name_2, sep = "\t")

        data_gene_1 <- intersect(data_gene, data_gene_1)

        data_score <- numeric(length = 1000)

        set.seed(100000000)

        for (k in 1:1000)
        {
            data_gene_2 <- sample(data_gene_1, data_set_size[i])

            data_gene_2 <- sort(data_gene_2)

            data_position_2 <- merge(data_position_1, data_gene_2, by = 1, sort = FALSE)

            data_position_3 <- data_position_2[,2]

            data_coexpression_3 <- data_coexpression[data_position_3,data_position_3]

            data_coexpression_4 <- data_coexpression_3[upper.tri(data_coexpression_3)]

            data_coexpression_4 <- data_coexpression_4[!is.na(data_coexpression_4)]

## Calculate co-expression fold enrichment score.

            data_coexpression_5 <- data_coexpression_4[data_coexpression_4>=data_threshold]

            data_score[k] <- length(data_coexpression_5)/length(data_coexpression_4)/data_adjusted_density
        }

        write.table(t(data_score), file = file_name_1, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
    }
}
