## Co-expression enrichment analysis of GO biological process terms in NPCs calculated using proportionality phi_s.

library(dismay)

data_metric <- c("pearson", "spearman", "kendall", "bicor", "zi_kendall", "binomial", "MI", "cosine", "jaccard", "euclidean", "canberra", "manhattan", "weighted_rank", "hamming", "dice", "phi_s", "rho_p")

for (i in 16:16)
{
    data_expression <- read.table("../expression/NPCs/GSE_8.txt", sep = "\t")

    data_gene <- data_expression[,1]

    data_position <- 1:length(data_gene)

    data_position_1 <- data.frame(data_gene, data_position)

    data_expression_1 <- data_expression[,2:ncol(data_expression)]

    data_expression_1 <- t(data_expression_1)

    colnames(data_expression_1) <- data_position

    data_coexpression <- dismay(data_expression_1, metric = data_metric[i])

    data_coexpression_1 <- data_coexpression[upper.tri(data_coexpression)]

    data_coexpression_1 <- data_coexpression_1[!is.na(data_coexpression_1)]

    data_coexpression_1 <- sort(data_coexpression_1, decreasing = TRUE)

## Calculate co-expression network threshold.

    data_threshold_position <- length(data_coexpression_1)*0.005

    data_threshold_position <- round(data_threshold_position, digits = 0)

    data_threshold <- data_coexpression_1[data_threshold_position]

## Adjust co-expression network density if there are ties of threshold.

    data_coexpression_2 <- data_coexpression_1[data_coexpression_1>=data_threshold]

    data_adjusted_density <- length(data_coexpression_2)/length(data_coexpression_1)

    gene_set <- scan("GO_3.txt", what = "character", sep = "\t")

    file_name <- paste("score_GO_NPCs_", ".txt", sep = data_metric[i])

    for (j in 1:length(gene_set))
    {
        file_name_1 <- paste("./GO/", ".txt", sep = gene_set[j])

        file_name_1 <- gsub(":", "_", file_name_1)

        data_gene_1 <- read.table(file_name_1, sep = "\t")

        data_position_2 <- merge(data_position_1, data_gene_1, by = 1, sort = FALSE)

        data_position_3 <- data_position_2[,2]

        data_coexpression_3 <- data_coexpression[data_position_3,data_position_3]

        data_coexpression_4 <- data_coexpression_3[upper.tri(data_coexpression_3)]

        data_coexpression_4 <- data_coexpression_4[!is.na(data_coexpression_4)]

## Calculate co-expression fold enrichment score.

        data_coexpression_5 <- data_coexpression_4[data_coexpression_4>=data_threshold]

        data_score <- length(data_coexpression_5)/length(data_coexpression_4)/data_adjusted_density

        write.table(data_score, file = file_name, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
    }
}

gene_set <- scan("GO_3.txt", what = "character", sep = "\t")

data_score <- scan("score_GO_NPCs_phi_s.txt", what = "character", sep = "\t")

data_score_1 <- data.frame(gene_set, data_score)

write.table(data_score_1, file = "score_GO_NPCs_phi_s_1.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

data_GO <- read.table("../GO_overlap/GO.txt", sep = "\t", quote = "", colClasses = "character", comment.char = "")

data_score <- read.table("score_GO_NPCs_phi_s_1.txt", sep = "\t", colClasses = "character")

data_score_1 <- merge(data_GO, data_score, by = 1, all.y = TRUE)

write.table(data_score_1, file = "score_GO_NPCs_phi_s_2.txt", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
