## Plot co-expression enrichment score of eight NDD gene sets in six major cortical cell types by downsampling.

library(ggplot2)

gene_set <- scan("../gene_set/gene_set.txt", what = "character", sep = "\t")

gene_set <- gene_set[1:8]

cell_type <- scan("../expression/sample_id_9.txt", what = "character", sep = "\t")

column_name <- c("Score", "Type")

for (i in 1:length(gene_set))
{
    file_name <- paste("score_downsample_", ".txt", sep = gene_set[i])

    write.table(t(column_name), file = file_name, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

    data_score <- numeric(length = length(cell_type))

    for (j in 1:5)
    {
        file_name_1 <- paste("score_downsample_", "_005.txt", sep = cell_type[j])

        data_score_1 <- read.table(file_name_1, sep = "\t")

        data_score_2 <- data_score_1[,i]

        data_score[j] <- mean(data_score_2)

        data_cell_type <- rep(cell_type[j], length(data_score_2))

        data_score_3 <- data.frame(data_score_2, data_cell_type)

        write.table(data_score_3, file = file_name, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
    }

    data_score_1 <- scan("score_Microglia_005.txt", sep = "\t")

    data_score_2 <- data_score_1[i]

    data_score[6] <- data_score_2

    data_cell_type <- rep("Microglia", length(data_score_2))

    data_score_3 <- data.frame(data_score_2, data_cell_type)

    write.table(data_score_3, file = file_name, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

    data_p <- vector(mode = "character", length = length(cell_type))

    for (j in 1:length(cell_type))
    {
        file_name_2 <- paste("density_", "_005.txt", sep = cell_type[j])

        data_density <- read.table(file_name_2, sep = "\t")

        file_name_3 <- paste("background_", "_005.txt", sep = cell_type[j])

        data_background <- scan(file_name_3, sep = "\t")

        data_density[i,1] <- data_score[j]*data_density[i,2]*data_background[1]/data_background[2]

        data_density[i,1] <- round(data_density[i,1], digits = 0)

        data_density_background <- matrix(data = NA, nrow = 2, ncol = 2)

        data_density_background[1,1] <- data_density[i,1]

        data_density_background[1,2] <- data_density[i,2]-data_density[i,1]

        data_density_background[2,1] <- data_background[1]

        data_density_background[2,2] <- data_background[2]-data_background[1]

        data_test <- fisher.test(data_density_background, alternative = "greater")

        data_p[j] <- signif(data_test$p.value, digits = 2)

        data_p[j] <- paste("p", data_p[j], sep = "=")
    }

    data_score <- read.table(file_name, header = TRUE, sep = "\t")

    file_name_4 <- paste("score_downsample_", ".pdf", sep = gene_set[i])

    data_plot <- ggplot(data_score, aes(x = factor(Type), y = Score, colour = Type, fill = Type))

    data_plot + geom_violin(trim = FALSE) +

            theme_classic() +

            theme(legend.position = "none") +

            scale_colour_manual(values = c("cyan3", "darkgreen", "royalblue3", "orange", 2, "hotpink")) +

            scale_fill_manual(values = c("cyan3", "darkgreen", "royalblue3", "orange", 2, "hotpink")) +

            stat_summary(fun.y = mean, geom = "point", colour = 1, size = 3) +

            scale_x_discrete(limits = cell_type) +

            scale_y_continuous(breaks = c(0:5*5), limits = c(-2, 25)) +

            labs(title = gene_set[i], x = "Downsampling (n=68)", y = "Co-expression fold enrichment") +

            theme(axis.text.x = element_text(size = 20, hjust = 1, vjust = 1, angle = 45), axis.text.y = element_text(size = 25)) +

            theme(plot.title = element_text(face = "bold", size = 25), axis.title.x = element_text(size = 25, hjust = 0.5), axis.title.y = element_text(size = 25, vjust = 0.4)) +

            geom_text(x = 1, y = 25, label = data_p[1], colour = 1, size = 6) +

            geom_text(x = 2, y = 20, label = data_p[2], colour = 1, size = 6) +

            geom_text(x = 3, y = 15, label = data_p[3], colour = 1, size = 6) +

            geom_text(x = 4, y = 25, label = data_p[4], colour = 1, size = 6) +

            geom_text(x = 5, y = 20, label = data_p[5], colour = 1, size = 6) +

            geom_text(x = 6, y = 15, label = data_p[6], colour = 1, size = 6)

    ggsave(file = file_name_4, width = 8, height = 8)
}
